91 files changed, 3234 insertions, 1807 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml
index f4ffb5223..f4536cca5 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -8,7 +8,7 @@ jobs:
     docker:
       # CircleCI maintains a library of pre-built images
       # documented at https://circleci.com/docs/2.0/circleci-images/
-      - image: circleci/python:3.6.6
+      - image: circleci/python:3.8.4
 
     working_directory: ~/repo
 
@@ -18,20 +18,19 @@ jobs:
       - run:
           name: create virtual environment, install dependencies
           command: |
-            python3 -m venv venv
-            ln -s $(which python3) venv/bin/python3.6
-            . venv/bin/activate
             sudo apt-get update
             sudo apt-get install -y graphviz texlive-fonts-recommended texlive-latex-recommended texlive-latex-extra texlive-generic-extra latexmk texlive-xetex
+            python3.8 -m venv venv
+            . venv/bin/activate
 
       - run:
           name: build numpy
           command: |
             . venv/bin/activate
-            pip install --upgrade pip 'setuptools<49.2.0'
-            pip install -r test_requirements.txt
+            pip install --progress-bar=off --upgrade pip 'setuptools<49.2.0'
+            pip install --progress-bar=off -r test_requirements.txt
             pip install .
-            pip install -r doc_requirements.txt
+            pip install --progress-bar=off -r doc_requirements.txt
 
       - run:
           name: create release notes
diff --git a/.gitattributes b/.gitattributes
index dad6dde37..ad7d3b227 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -3,3 +3,18 @@ numpy/lib/tests/data/*.npy binary
 
 # Release notes, reduce number of conflicts.
 doc/release/*.rst merge=union
+
+# Highlight our custom templating language as C, since it's hopefully better
+# than nothing. This also affects repo statistics.
+*.c.src linguist-language=C
+*.h.src linguist-language=C
+
+# Mark some files as vendored
+numpy/linalg/lapack_lite/f2c.c linguist-vendored
+numpy/linalg/lapack_lite/f2c.h linguist-vendored
+tools/npy_tempita/* linguist-vendored
+
+# Mark some files as generated
+numpy/linalg/lapack_lite/f2c_*.c linguist-generated
+numpy/linalg/lapack_lite/lapack_lite_names.h linguist-generated
+
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE/bug-report.md
index 3a25eeb1e..d2df08689 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -1,3 +1,8 @@
+---
+name: "Bug Report"
+about: Submit a bug report to help us improve NumPy
+
+---
 
 <!-- Please describe the issue in detail here, and fill in the fields below -->
 
@@ -11,8 +16,6 @@ import numpy as np
 << your code here >>
 ```
 
-<!-- Remove these sections for a feature request -->
-
 ### Error message:
 
 <!-- If you are reporting a segfault please include a GDB traceback, which you
@@ -21,7 +24,7 @@ https://github.com/numpy/numpy/blob/master/doc/source/dev/development_environmen
 
 <!-- Full error message, if any (starting from line Traceback: ...) -->
 
-### Numpy/Python version information:
+### NumPy/Python version information:
 
 <!-- Output from 'import sys, numpy; print(numpy.__version__, sys.version)' -->
 
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 000000000..adfff81bd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,7 @@
+contact_links:
+  - name: Question/Help/Support
+    url: https://numpy.org/gethelp/
+    about: "If you have a question, please look at the listed resources available on the website."
+  - name: Development-related matters
+    url: https://numpy.org/community/
+    about: "If you would like to discuss development-related matters or need help from the NumPy team, see our community's communication channels."
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
new file mode 100644
index 000000000..cdb7cde2e
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,20 @@
+---
+name: "Documentation"
+about: Report an issue related to the NumPy documentation
+labels: 04 - Documentation
+
+---
+
+## Documentation
+
+<!-- If this is an issue with the current documentation for NumPy (e.g.
+incomplete/inaccurate docstring, unclear explanation in any part of the
+documentation), make sure to leave a reference to the document/code you're
+referring to. You can also check the development version of the documentation
+and see if this issue has already been addressed: https://numpy.org/devdocs/
+-->
+
+<!-- If this is an idea or a request for content, please describe as clearly as
+possible what topics you think are missing from the current documentation. Make
+sure to check https://github.com/numpy/numpy-tutorials and see if this issue
+might be more appropriate there. -->
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 000000000..0be94f928
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,16 @@
+---
+name: "Feature Request"
+about: Check instructions for submitting your idea on the mailing list first.
+
+---
+
+## Feature
+
+<!-- If you're looking to request a new feature or change in functionality, including
+adding or changing the meaning of arguments to an existing function, please
+post your idea on the [numpy-discussion mailing list]
+(https://mail.python.org/mailman/listinfo/numpy-discussion) to explain your
+reasoning in addition to opening an issue or pull request. You can also check
+out our [Contributor Guide]
+(https://github.com/numpy/numpy/blob/master/doc/source/dev/index.rst) if you
+need more information. -->
diff --git a/.github/ISSUE_TEMPLATE/post-install.md b/.github/ISSUE_TEMPLATE/post-install.md
new file mode 100644
index 000000000..c0ec7896a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/post-install.md
@@ -0,0 +1,21 @@
+---
+name: "Post-install/importing issue"
+about: If you have trouble importing or using NumPy after installation
+labels: 32 - Installation
+
+---
+
+<!-- Please describe the issue in detail here, and fill in the fields below. Also, check our Troubleshooting ImportError document to see if your issue is listed there: https://numpy.org/devdocs/user/troubleshooting-importerror.html -->
+
+### Steps to reproduce:
+
+<!-- Please describe the installation method (e.g. building from source, Anaconda, pip), your OS and NumPy/Python version information -->
+
+### Error message:
+
+<!-- If you are reporting a segfault please include a GDB traceback, which you
+can generate by following
+https://github.com/numpy/numpy/blob/master/doc/source/dev/development_environment.rst#debugging -->
+
+<!-- Full error message, if any (starting from line Traceback: ...) -->
+
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index e12eea7bd..528580a8e 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -5,3 +5,7 @@ http://www.numpy.org/devdocs/dev/development_workflow.html
 <!-- We'd appreciate it if your commit message is properly formatted
 http://www.numpy.org/devdocs/dev/development_workflow.html#writing-the-commit-message
 -->
+
+<!-- If you're submitting a new feature or substantial change in functionality,
+make sure you discuss your changes in the numpy-discussion mailing list first: 
+https://mail.python.org/mailman/listinfo/numpy-discussion -->
diff --git a/.gitmodules b/.gitmodules
index b1e13c3bc..e69de29bb 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "doc/sphinxext"]
-	path = doc/sphinxext
-	url = https://github.com/numpy/numpydoc.git
diff --git a/.mailmap b/.mailmap
index ae221c020..5b3415955 100644
--- a/.mailmap
+++ b/.mailmap
@@ -232,6 +232,7 @@ Shota Kawabuchi <shota.kawabuchi+GitHub@gmail.com> skwbc <shota.kawabuchi+GitHub
 Siavash Eliasi <siavashserver@gmail.com> siavashserver <siavashserver@gmail.com>
 Simon Gasse <simon.gasse@gmail.com> sgasse <sgasse@users.noreply.github.com>
 Søren Rasmussen <soren.rasmussen@alexandra.dk> sorenrasmussenai <47032123+sorenrasmussenai@users.noreply.github.com>
+Stefan Behnel <stefan_ml@behnel.de> scoder <stefan_ml@behnel.de>
 Stefan van der Walt <stefanv@berkeley.edu> Stefan van der Walt <sjvdwalt@gmail.com>
 Stefan van der Walt <stefanv@berkeley.edu> Stefan van der Walt <stefan@sun.ac.za>
 Stephan Hoyer <shoyer@gmail.com> Stephan Hoyer <shoyer@climate.com>
diff --git a/doc/DISTUTILS.rst.txt b/doc/DISTUTILS.rst.txt
index 01527374d..f1f270462 100644
--- a/doc/DISTUTILS.rst.txt
+++ b/doc/DISTUTILS.rst.txt
@@ -587,10 +587,6 @@ The header of a typical SciPy ``__init__.py`` is::
   test = Tester().test
   bench = Tester().bench
 
-Note that NumPy submodules still use a file named ``info.py`` in which the
-module docstring and ``__all__`` dict are defined.  These files will be removed
-at some point.
-
 Extra features in NumPy Distutils
 '''''''''''''''''''''''''''''''''
 
diff --git a/doc/changelog/1.19.2-changelog.rst b/doc/changelog/1.19.2-changelog.rst
new file mode 100644
index 000000000..47db1dd59
--- /dev/null
+++ b/doc/changelog/1.19.2-changelog.rst
@@ -0,0 +1,30 @@
+
+Contributors
+============
+
+A total of 8 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Charles Harris
+* Matti Picus
+* Pauli Virtanen
+* Philippe Ombredanne +
+* Sebastian Berg
+* Stefan Behnel +
+* Stephan Loyd +
+* Zac Hatfield-Dodds
+
+Pull requests merged
+====================
+
+A total of 9 pull requests were merged for this release.
+
+* `#16959 <https://github.com/numpy/numpy/pull/16959>`__: TST: Change aarch64 to arm64 in travis.yml.
+* `#16998 <https://github.com/numpy/numpy/pull/16998>`__: MAINT: Configure hypothesis in ``np.test()`` for determinism,...
+* `#17000 <https://github.com/numpy/numpy/pull/17000>`__: BLD: pin setuptools < 49.2.0
+* `#17015 <https://github.com/numpy/numpy/pull/17015>`__: ENH: Add NumPy declarations to be used by Cython 3.0+
+* `#17125 <https://github.com/numpy/numpy/pull/17125>`__: BUG: Remove non-threadsafe sigint handling from fft calculation
+* `#17243 <https://github.com/numpy/numpy/pull/17243>`__: BUG: core: fix ilp64 blas dot/vdot/... for strides > int32 max
+* `#17244 <https://github.com/numpy/numpy/pull/17244>`__: DOC: Use SPDX license expressions with correct license
+* `#17245 <https://github.com/numpy/numpy/pull/17245>`__: DOC: Fix the link to the quick-start in the old API functions
+* `#17272 <https://github.com/numpy/numpy/pull/17272>`__: BUG: fix pickling of arrays larger than 2GiB
diff --git a/doc/neps/nep-0029-deprecation_policy.rst b/doc/neps/nep-0029-deprecation_policy.rst
index dbead1b9b..4674d24ec 100644
--- a/doc/neps/nep-0029-deprecation_policy.rst
+++ b/doc/neps/nep-0029-deprecation_policy.rst
@@ -111,8 +111,10 @@ Jun 23, 2020 3.7+   1.15+
 Jul 23, 2020 3.7+   1.16+
 Jan 13, 2021 3.7+   1.17+
 Jul 26, 2021 3.7+   1.18+
-Dec 26, 2021 3.8+   1.18+
-Apr 14, 2023 3.9+   1.18+
+Dec 22, 2021 3.7+   1.19+
+Dec 26, 2021 3.8+   1.19+
+Jun 21, 2022 3.8+   1.20+
+Apr 14, 2023 3.9+   1.20+
 ============ ====== =====
 
 
@@ -127,7 +129,9 @@ Drop Schedule
   On Jul 23, 2020 drop support for Numpy 1.15 (initially released on Jul 23, 2018)
   On Jan 13, 2021 drop support for Numpy 1.16 (initially released on Jan 13, 2019)
   On Jul 26, 2021 drop support for Numpy 1.17 (initially released on Jul 26, 2019)
+  On Dec 22, 2021 drop support for Numpy 1.18 (initially released on Dec 22, 2019)
   On Dec 26, 2021 drop support for Python 3.7 (initially released on Jun 27, 2018)
+  On Jun 21, 2022 drop support for Numpy 1.19 (initially released on Jun 20, 2020)
   On Apr 14, 2023 drop support for Python 3.8 (initially released on Oct 14, 2019)
 
 
@@ -255,6 +259,8 @@ Code to generate support and drop schedule tables ::
   Jan 13, 2019: Numpy 1.16
   Jul 26, 2019: Numpy 1.17
   Oct 14, 2019: Python 3.8
+  Dec 22, 2019: Numpy 1.18
+  Jun 20, 2020: Numpy 1.19
   """
 
   releases = []
@@ -274,8 +280,12 @@ Code to generate support and drop schedule tables ::
 
   releases = sorted(releases, key=lambda x: x[0])
 
-  minpy = '3.9+'
-  minnum = '1.18+'
+
+  py_major,py_minor = sorted([int(x) for x in r[2].split('.')] for r in releases if r[1] == 'Python')[-1]
+  minpy = f"{py_major}.{py_minor+1}+"
+
+  num_major,num_minor = sorted([int(x) for x in r[2].split('.')] for r in releases if r[1] == 'Numpy')[-1]
+  minnum = f"{num_major}.{num_minor+1}+"
 
   toprint_drop_dates = ['']
   toprint_support_table = []
@@ -289,14 +299,14 @@ Code to generate support and drop schedule tables ::
           minnum = v+'+'
       else:
           minpy = v+'+'
-
-  for e in toprint_drop_dates[::-1]:
+  print("On next release, drop support for Python 3.5 (initially released on Sep 13, 2015)")
+  for e in toprint_drop_dates[-4::-1]:
       print(e)
 
   print('============ ====== =====')
   print('Date         Python NumPy')
   print('------------ ------ -----')
-  for e in toprint_support_table[::-1]:
+  for e in toprint_support_table[-4::-1]:
       print(e)
   print('============ ====== =====')
 
diff --git a/doc/release/upcoming_changes/16650.compatibility.rst b/doc/release/upcoming_changes/16650.compatibility.rst
new file mode 100644
index 000000000..653232355
--- /dev/null
+++ b/doc/release/upcoming_changes/16650.compatibility.rst
@@ -0,0 +1,16 @@
+`numpy.genfromtxt` now correctly unpacks structured arrays
+----------------------------------------------------------
+Previously, `numpy.genfromtxt` failed to unpack if it was called with
+``unpack=True`` and a structured datatype was passed to the ``dtype`` argument
+(or ``dtype=None`` was passed and a structured datatype was inferred).
+For example::
+
+    >>> data = StringIO("21 58.0\n35 72.0")
+    >>> np.genfromtxt(data, dtype=None, unpack=True)
+    array([(21, 58.), (35, 72.)], dtype=[('f0', '<i8'), ('f1', '<f8')])
+
+Structured arrays will now correctly unpack into a list of arrays,
+one for each column::
+
+    >>> np.genfromtxt(data, dtype=None, unpack=True)
+    [array([21, 35]), array([58., 72.])]
diff --git a/doc/release/upcoming_changes/17219.new_feature.rst b/doc/release/upcoming_changes/17219.new_feature.rst
new file mode 100644
index 000000000..a6985ef0d
--- /dev/null
+++ b/doc/release/upcoming_changes/17219.new_feature.rst
@@ -0,0 +1,12 @@
+Negation of user-defined BLAS/LAPACK detection order
+----------------------------------------------------
+`distutils` allows negation of libraries when determining BLAS/LAPACK
+libraries.
+This may be used to remove an item from the library resolution phase, i.e.
+to disallow NetLIB libraries one could do::
+
+.. code:: bash
+
+    NPY_BLAS_ORDER='^blas' NPY_LAPACK_ORDER='^lapack' python setup.py build
+
+which will use any of the accelerated libraries instead.
diff --git a/doc/source/conf.py b/doc/source/conf.py
index e34be7f5c..fe7ea0967 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -367,15 +367,15 @@ def linkcode_resolve(domain, info):
            numpy.__version__, fn, linespec)
 
 from pygments.lexers import CLexer
-import copy
+from pygments.lexer import inherit, bygroups
+from pygments.token import Comment
 
 class NumPyLexer(CLexer):
     name = 'NUMPYLEXER'
 
-    tokens = copy.deepcopy(CLexer.tokens)
-    # Extend the regex for valid identifiers with @
-    for k, val in tokens.items():
-        for i, v in enumerate(val):
-            if isinstance(v, tuple):
-                if isinstance(v[0], str):
-                    val[i] =  (v[0].replace('a-zA-Z', 'a-zA-Z@'),) + v[1:]
+    tokens = {
+        'statements': [
+            (r'@[a-zA-Z_]*@', Comment.Preproc, 'macro'),
+            inherit,
+        ],
+    }
diff --git a/doc/source/docs/howto_document.rst b/doc/source/docs/howto_document.rst
index cf86b7e99..9f9068ab3 100644
--- a/doc/source/docs/howto_document.rst
+++ b/doc/source/docs/howto_document.rst
@@ -40,29 +40,7 @@ after which you may use it::
 
   np.fft.fft2(...)
 
-.. rubric::
-    **For convenience the** `formatting standard`_ **is included below with an
-    example**
-
-.. include:: ../../sphinxext/doc/format.rst
-
-.. _example:
-
-Example Source
-==============
-
-.. literalinclude:: ../../sphinxext/doc/example.py
-
-
-
-Example Rendered
-================
-
-.. ifconfig:: python_version_major < '3'
-
-    The example is rendered only when sphinx is run with python3 and above
-
-.. automodule:: doc.example
-    :members:
+Please use the numpydoc `formatting standard`_ as shown in their example_
 
 .. _`formatting standard`: https://numpydoc.readthedocs.io/en/latest/format.html
+.. _example: https://numpydoc.readthedocs.io/en/latest/example.html
diff --git a/doc/source/glossary.rst b/doc/source/glossary.rst
index d37534960..4a59c990b 100644
--- a/doc/source/glossary.rst
+++ b/doc/source/glossary.rst
@@ -2,10 +2,98 @@
 Glossary
 ********
 
-.. toctree::
-
 .. glossary::
 
+
+   (`n`,)
+       A tuple with one element. The trailing comma distinguishes a one-element
+       tuple from a parenthesized ``n``.
+
+
+   -1
+       Used as a dimension entry, ``-1`` instructs NumPy to choose the length
+       that will keep the total number of elements the same.
+
+
+   ``...``
+       An :py:data:`Ellipsis`
+
+       **When indexing an array**, shorthand that the missing axes, if they
+       exist, are full slices.
+
+           >>> a = np.arange(24).reshape(2,3,4)
+
+           >>> a[...].shape
+           (2, 3, 4)
+
+           >>> a[...,0].shape
+           (2, 3)
+
+           >>> a[0,...].shape
+           (3, 4)
+
+           >>> a[0,...,0].shape
+           (3,)
+
+       It can be used at most once; ``a[...,0,...]`` raises an :exc:`IndexError`.
+
+       **In printouts**, NumPy substitutes ``...`` for the middle elements of
+       large arrays. To see the entire array, use `numpy.printoptions`
+
+
+   ``:``
+       The Python :term:`python:slice`
+       operator. In ndarrays, slicing can be applied to every
+       axis:
+
+           >>> a = np.arange(24).reshape(2,3,4)
+           >>> a
+           array([[[ 0,  1,  2,  3],
+                   [ 4,  5,  6,  7],
+                   [ 8,  9, 10, 11]],
+           <BLANKLINE>
+                  [[12, 13, 14, 15],
+                   [16, 17, 18, 19],
+                   [20, 21, 22, 23]]])
+           <BLANKLINE>
+           >>> a[1:,-2:,:-1]
+           array([[[16, 17, 18],
+                   [20, 21, 22]]])
+
+       Trailing slices can be omitted: ::
+
+           >>> a[1] == a[1,:,:]
+           array([[ True,  True,  True,  True],
+                  [ True,  True,  True,  True],
+                  [ True,  True,  True,  True]])
+
+       In contrast to Python, where slicing creates a copy, in NumPy slicing
+       creates a :term:`view`.
+
+       For details, see :ref:`combining-advanced-and-basic-indexing`.
+
+
+   ``<``
+       In a dtype declaration, indicates that the data is
+       :term:`little-endian` (the bracket is big on the right). ::
+
+           >>> dt = np.dtype('<f')  # little-endian single-precision float
+
+
+   ``>``
+       In a dtype declaration, indicates that the data is
+       :term:`big-endian` (the bracket is big on the left). ::
+
+           >>> dt = np.dtype('>H')  # big-endian unsigned short
+
+
+   advanced indexing
+       Rather than using a :doc:`scalar <reference/arrays.scalars>` or slice as
+       an index, an axis can be indexed with an array, providing fine-grained
+       selection. This is known as :ref:`advanced indexing<advanced-indexing>`
+       or "fancy indexing".
+
+
    along an axis
        Axes are defined for arrays with more than one dimension.  A
        2-dimensional array has two corresponding axes: the first running
@@ -26,6 +114,7 @@ Glossary
          >>> x.sum(axis=1)
          array([ 6, 22, 38])
 
+
    array
        A homogeneous container of numerical elements.  Each element in the
        array occupies a fixed amount of memory (hence homogeneous), and
@@ -50,19 +139,92 @@ Glossary
 
        Fast element-wise operations, called a :term:`ufunc`, operate on arrays.
 
+
    array_like
        Any sequence that can be interpreted as an ndarray.  This includes
        nested lists, tuples, scalars and existing arrays.
 
+
+   array scalar
+       For uniformity in handling operands, NumPy treats
+       a :doc:`scalar <reference/arrays.scalars>` as an array of zero
+       dimension.
+
+
+   axis
+
+       Another term for an array dimension. Axes are numbered left to right;
+       axis 0 is the first element in the shape tuple.
+
+       In a two-dimensional vector, the elements of axis 0 are rows and the
+       elements of axis 1 are columns.
+
+       In higher dimensions, the picture changes. NumPy prints
+       higher-dimensional vectors as replications of row-by-column building
+       blocks, as in this three-dimensional vector:
+
+           >>> a = np.arange(12).reshape(2,2,3)
+           >>> a
+           array([[[ 0,  1,  2],
+                   [ 3,  4,  5]],
+           <BLANKLINE>
+                  [[ 6,  7,  8],
+                   [ 9, 10, 11]]])
+
+       ``a`` is depicted as a two-element array whose elements are 2x3 vectors.
+       From this point of view, rows and columns are the final two axes,
+       respectively, in any shape.
+
+       This rule helps you anticipate how a vector will be printed, and
+       conversely how to find the index of any of the printed elements. For
+       instance, in the example, the last two values of 8's index must be 0 and
+       2. Since 8 appears in the second of the two 2x3's, the first index must
+       be 1:
+
+           >>> a[1,0,2]
+           8
+
+       A convenient way to count dimensions in a printed vector is to
+       count ``[`` symbols after the open-parenthesis. This is
+       useful in distinguishing, say, a (1,2,3) shape from a (2,3) shape:
+
+           >>> a = np.arange(6).reshape(2,3)
+           >>> a.ndim
+           2
+           >>> a
+           array([[0, 1, 2],
+                  [3, 4, 5]])
+
+           >>> a = np.arange(6).reshape(1,2,3)
+           >>> a.ndim
+           3
+           >>> a
+           array([[[0, 1, 2],
+                   [3, 4, 5]]])
+
+
+   .base
+
+       If an array does not own its memory, then its
+       :doc:`base <reference/generated/numpy.ndarray.base>` attribute
+       returns the object whose memory the array is referencing. That object
+       may be borrowing the memory from still another object, so the
+       owning object may be ``a.base.base.base...``. Despite advice to the
+       contrary, testing ``base`` is not a surefire way to determine if two
+       arrays are :term:`view`\ s.
+
+
    big-endian
        When storing a multi-byte value in memory as a sequence of bytes, the
        sequence addresses/sends/stores the most significant byte first (lowest
        address) and the least significant byte last (highest address). Common in
        micro-processors and used for transmission of data over network protocols.
 
+
    BLAS
        `Basic Linear Algebra Subprograms <https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms>`_
 
+
    broadcast
        NumPy can do operations on arrays whose shapes are mismatched::
 
@@ -82,9 +244,11 @@ Glossary
 
        See `basics.broadcasting` for more information.
 
+
    C order
        See `row-major`
 
+
    column-major
        A way to represent items in a N-dimensional array in the 1-dimensional
        computer memory. In column-major order, the leftmost index "varies the
@@ -100,6 +264,11 @@ Glossary
        Column-major order is also known as the Fortran order, as the Fortran
        programming language uses it.
 
+   copy
+
+       See :term:`view`.
+
+
    decorator
        An operator that transforms a function.  For example, a ``log``
        decorator may be defined to print debugging information upon
@@ -124,6 +293,7 @@ Glossary
        Logging call with parameters: (1, 2) {}
        3
 
+
    dictionary
        Resembling a language dictionary, which provides a mapping between
        words and descriptions thereof, a Python dictionary is a mapping
@@ -149,48 +319,55 @@ Glossary
        For more information on dictionaries, read the
        `Python tutorial <https://docs.python.org/tutorial/>`_.
 
+
+   dimension
+
+       See :term:`axis`.
+
+
+   dtype
+
+       The datatype describing the (identically typed) elements in an ndarray.
+       It can be changed to reinterpret the array contents. For details, see
+       :doc:`Data type objects (dtype). <reference/arrays.dtypes>`
+
+
+   fancy indexing
+
+       Another term for :term:`advanced indexing`.
+
+
    field
        In a :term:`structured data type`, each sub-type is called a `field`.
        The `field` has a name (a string), a type (any valid dtype), and
        an optional `title`. See :ref:`arrays.dtypes`
 
+
    Fortran order
        See `column-major`
 
+
    flattened
        Collapsed to a one-dimensional array. See `numpy.ndarray.flatten`
        for details.
 
+
    homogeneous
-       Describes a block of memory comprised of blocks, each block comprised of 
+       Describes a block of memory comprised of blocks, each block comprised of
        items and of the same size, and blocks are interpreted in exactly the
        same way. In the simplest case each block contains a single item, for
        instance int32 or float64.
 
+
    immutable
        An object that cannot be modified after execution is called
        immutable.  Two common examples are strings and tuples.
 
-   iterable
-       A sequence that allows "walking" (iterating) over items, typically
-       using a loop such as::
-
-         >>> x = [1, 2, 3]
-         >>> [item**2 for item in x]
-         [1, 4, 9]
-
-       It is often used in combination with ``enumerate``::
-         >>> keys = ['a','b','c']
-         >>> for n, k in enumerate(keys):
-         ...     print("Key %d: %s" % (n, k))
-         ...
-         Key 0: a
-         Key 1: b
-         Key 2: c
 
    itemsize
        The size of the dtype element in bytes.
 
+
    list
        A Python container that can hold any number of objects or items.
        The items do not have to be of the same type, and can even be
@@ -223,12 +400,14 @@ Glossary
        tutorial <https://docs.python.org/tutorial/>`_.  For a mapping
        type (key-value), see *dictionary*.
 
+
    little-endian
        When storing a multi-byte value in memory as a sequence of bytes, the
        sequence addresses/sends/stores the least significant byte first (lowest
        address) and the most significant byte last (highest address). Common in
        x86 processors.
 
+
    mask
        A boolean array, used to select only certain elements for an operation::
 
@@ -244,6 +423,7 @@ Glossary
          >>> x
          array([ 0,  1,  2,  -1, -1])
 
+
    masked array
        Array that suppressed values indicated by a mask::
 
@@ -262,6 +442,7 @@ Glossary
        Masked arrays are often used when operating on arrays containing
        missing or invalid entries.
 
+
    matrix
        A 2-dimensional ndarray that preserves its two-dimensional nature
        throughout operations.  It has certain special operations, such as ``*``
@@ -276,18 +457,40 @@ Glossary
          matrix([[ 7, 10],
                [15, 22]])
 
+
    ndarray
        See *array*.
 
+
+   object array
+
+       An array whose dtype is ``object``; that is, it contains references to
+       Python objects. Indexing the array dereferences the Python objects, so
+       unlike other ndarrays, an object array has the ability to hold
+       heterogeneous objects.
+
+
+   ravel
+
+       `numpy.ravel` and `numpy.ndarray.flatten` both flatten an ndarray. ``ravel``
+       will return a view if possible; ``flatten`` always returns a copy.
+
+       Flattening collapses a multi-dimensional array to a single dimension;
+       details of how this is done (for instance, whether ``a[n+1]`` should be
+       the next row or next column) are parameters.
+
+
    record array
        An :term:`ndarray` with :term:`structured data type` which has been
        subclassed as ``np.recarray`` and whose dtype is of type ``np.record``,
        making the fields of its data type to be accessible by attribute.
 
+
    reference
        If ``a`` is a reference to ``b``, then ``(a is b) == True``.  Therefore,
        ``a`` and ``b`` are different names for the same Python object.
 
+
    row-major
        A way to represent items in a N-dimensional array in the 1-dimensional
        computer memory. In row-major order, the rightmost index "varies
@@ -303,6 +506,7 @@ Glossary
        Row-major order is also known as the C order, as the C programming
        language uses it. New NumPy arrays are by default in row-major order.
 
+
    slice
        Used to select only certain elements from a sequence:
 
@@ -330,9 +534,40 @@ Glossary
        >>> x[:, 1]
        array([2, 4])
 
+
+   stride
+
+       Physical memory is one-dimensional;  strides provide a mechanism to map
+       a given index to an address in memory. For an N-dimensional array, its
+       ``strides`` attribute is an N-element tuple; advancing from index
+       ``i`` to index ``i+1`` on axis ``n`` means adding ``a.strides[n]`` bytes
+       to the address.
+
+       Strides are computed automatically from an array's dtype and
+       shape, but can be directly specified using
+       :doc:`as_strided. <reference/generated/numpy.lib.stride_tricks.as_strided>`
+
+       For details, see
+       :doc:`numpy.ndarray.strides <reference/generated/numpy.ndarray.strides>`.
+
+       To see how striding underlies the power of NumPy views, see
+       `The NumPy array: a structure for efficient numerical computation. \
+       <https://arxiv.org/pdf/1102.1523.pdf>`_
+
+
+   structure
+       See :term:`structured data type`
+
+
+   structured array
+
+       Array whose :term:`dtype` is a :term:`structured data type`.
+
+
    structured data type
        A data type composed of other datatypes
 
+
    subarray data type
        A :term:`structured data type` may contain a :term:`ndarray` with its
        own dtype and shape:
@@ -342,16 +577,19 @@ Glossary
        array([(0, [0., 0., 0.]), (0, [0., 0., 0.]), (0, [0., 0., 0.])],
              dtype=[('a', '<i4'), ('b', '<f4', (3,))])
 
+
    title
        In addition to field names, structured array fields may have an
        associated :ref:`title <titles>` which is an alias to the name and is
        commonly used for plotting.
 
+
    ufunc
        Universal function.  A fast element-wise, :term:`vectorized
        <vectorization>` array operation.  Examples include ``add``, ``sin`` and
        ``logical_or``.
 
+
    vectorization
        Optimizing a looping block by specialized code. In a traditional sense,
        vectorization performs the same operation on multiple elements with
@@ -362,6 +600,7 @@ Glossary
        operations on multiple elements, typically achieving speedups by
        avoiding some of the overhead in looking up and converting the elements.
 
+
    view
        An array that does not own its data, but refers to another array's
        data instead.  For example, we may create a view that only shows
@@ -379,6 +618,7 @@ Glossary
          >>> y
          array([3, 2, 4])
 
+
    wrapper
        Python is a high-level (highly abstracted, or English-like) language.
        This abstraction comes at a price in execution speed, and sometimes
@@ -390,4 +630,3 @@ Glossary
        Examples include ctypes, SWIG and Cython (which wraps C and C++)
        and f2py (which wraps Fortran).
 
-
diff --git a/doc/source/reference/arrays.dtypes.rst b/doc/source/reference/arrays.dtypes.rst
index 575984707..301e26c55 100644
--- a/doc/source/reference/arrays.dtypes.rst
+++ b/doc/source/reference/arrays.dtypes.rst
@@ -152,14 +152,6 @@ Array-scalar types
        >>> dt = np.dtype(np.complex128) # 128-bit complex floating-point number
 
 Generic types
-    .. deprecated NumPy 1.19::
-
-        The use of generic types is deprecated. This is because it can be
-        unexpected in a context such as ``arr.astype(dtype=np.floating)``.
-        ``arr.astype(dtype=np.floating)`` which casts an array of ``float32``
-        to an array of ``float64``, even though ``float32`` is a subdtype of
-        ``np.floating``.
-
     The generic hierarchical type objects convert to corresponding
     type objects according to the associations:
 
@@ -172,6 +164,15 @@ Generic types
     :class:`generic`, :class:`flexible`                    :class:`void`
     =====================================================  ===============
 
+    .. deprecated:: 1.19
+
+        This conversion of generic scalar types is deprecated.
+        This is because it can be unexpected in a context such as
+        ``arr.astype(dtype=np.floating)``, which casts an array of ``float32``
+        to an array of ``float64``, even though ``float32`` is a subdtype of
+        ``np.floating``.
+
+
 Built-in Python types
     Several python types are equivalent to a corresponding
     array scalar when used to generate a :class:`dtype` object:
diff --git a/doc/source/reference/arrays.indexing.rst b/doc/source/reference/arrays.indexing.rst
index 3e600b7c4..180a79dae 100644
--- a/doc/source/reference/arrays.indexing.rst
+++ b/doc/source/reference/arrays.indexing.rst
@@ -198,6 +198,7 @@ concepts to remember include:
    create an axis of length one. :const:`newaxis` is an alias for
    'None', and 'None' can be used in place of this with the same result.
 
+.. _advanced-indexing:
 
 Advanced Indexing
 -----------------
@@ -304,6 +305,8 @@ understood with an example.
     most important thing to remember about indexing with multiple advanced
     indexes.
 
+.. _combining-advanced-and-basic-indexing:
+
 Combining advanced and basic indexing
 """""""""""""""""""""""""""""""""""""
 
diff --git a/doc/source/reference/arrays.scalars.rst b/doc/source/reference/arrays.scalars.rst
index f57a11724..46d2bb8fa 100644
--- a/doc/source/reference/arrays.scalars.rst
+++ b/doc/source/reference/arrays.scalars.rst
@@ -29,7 +29,7 @@ an array scalar object. Alternatively, what kind of array scalar is
 present can be determined using other members of the data type
 hierarchy. Thus, for example ``isinstance(val, np.complexfloating)``
 will return :py:data:`True` if *val* is a complex valued type, while
-:const:`isinstance(val, np.flexible)` will return true if *val* is one
+``isinstance(val, np.flexible)`` will return true if *val* is one
 of the flexible itemsize array types (:class:`string`,
 :class:`unicode`, :class:`void`).
 
@@ -65,19 +65,22 @@ Some of the scalar types are essentially equivalent to fundamental
 Python types and therefore inherit from them as well as from the
 generic array scalar type:
 
-====================  ================================
-Array scalar type     Related Python type
-====================  ================================
-:class:`int_`         :class:`IntType` (Python 2 only)
-:class:`float_`       :class:`FloatType`
-:class:`complex_`     :class:`ComplexType`
-:class:`bytes_`       :class:`BytesType`
-:class:`unicode_`     :class:`UnicodeType`
-====================  ================================
+====================  ===========================  =============
+Array scalar type     Related Python type          Inherits?
+====================  ===========================  =============
+:class:`int_`         :class:`int`                 Python 2 only
+:class:`float_`       :class:`float`               yes
+:class:`complex_`     :class:`complex`             yes
+:class:`bytes_`       :class:`bytes`               yes
+:class:`str_`         :class:`str`                 yes
+:class:`bool_`        :class:`bool`                no
+:class:`datetime64`   :class:`datetime.datetime`   no
+:class:`timedelta64`  :class:`datetime.timedelta`  no
+====================  ===========================  =============
 
 The :class:`bool_` data type is very similar to the Python
-:class:`BooleanType` but does not inherit from it because Python's
-:class:`BooleanType` does not allow itself to be inherited from, and
+:class:`bool` but does not inherit from it because Python's
+:class:`bool` does not allow itself to be inherited from, and
 on the C-level the size of the actual bool data is not the same as a
 Python Boolean scalar.
 
@@ -86,7 +89,7 @@ Python Boolean scalar.
    The :class:`bool_` type is not a subclass of the :class:`int_` type
    (the :class:`bool_` is not even a number type). This is different
    than Python's default implementation of :class:`bool` as a
-   sub-class of int.
+   sub-class of :class:`int`.
 
 .. warning::
 
@@ -113,11 +116,11 @@ Type                 Remarks                        Character code
 Integers:
 
 ===================  =============================  ===============
-:class:`byte`        compatible: C char             ``'b'``
-:class:`short`       compatible: C short            ``'h'``
-:class:`intc`        compatible: C int              ``'i'``
-:class:`int_`        compatible: Python int         ``'l'``
-:class:`longlong`    compatible: C long long        ``'q'``
+:class:`byte`        compatible: C ``char``         ``'b'``
+:class:`short`       compatible: C ``short``        ``'h'``
+:class:`intc`        compatible: C ``int``          ``'i'``
+:class:`int_`        compatible: C ``long``         ``'l'``
+:class:`longlong`    compatible: C ``long long``    ``'q'``
 :class:`intp`        large enough to fit a pointer  ``'p'``
 :class:`int8`        8 bits
 :class:`int16`       16 bits
@@ -127,18 +130,18 @@ Integers:
 
 Unsigned integers:
 
-===================  =============================  ===============
-:class:`ubyte`       compatible: C unsigned char    ``'B'``
-:class:`ushort`      compatible: C unsigned short   ``'H'``
-:class:`uintc`       compatible: C unsigned int     ``'I'``
-:class:`uint`        compatible: Python int         ``'L'``
-:class:`ulonglong`   compatible: C long long        ``'Q'``
-:class:`uintp`       large enough to fit a pointer  ``'P'``
+===================  =================================  ===============
+:class:`ubyte`       compatible: C ``unsigned char``    ``'B'``
+:class:`ushort`      compatible: C ``unsigned short``   ``'H'``
+:class:`uintc`       compatible: C ``unsigned int``     ``'I'``
+:class:`uint`        compatible: C ``long``             ``'L'``
+:class:`ulonglong`   compatible: C ``long long``        ``'Q'``
+:class:`uintp`       large enough to fit a pointer      ``'P'``
 :class:`uint8`       8 bits
 :class:`uint16`      16 bits
 :class:`uint32`      32 bits
 :class:`uint64`      64 bits
-===================  =============================  ===============
+===================  =================================  ===============
 
 Floating-point numbers:
 
diff --git a/doc/source/release.rst b/doc/source/release.rst
index f2bf654ff..1c77755fd 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -6,6 +6,7 @@ Release Notes
     :maxdepth: 3
 
     1.20.0 <release/1.20.0-notes>
+    1.19.2 <release/1.19.2-notes>
     1.19.1 <release/1.19.1-notes>
     1.19.0 <release/1.19.0-notes>
     1.18.4 <release/1.18.4-notes>
diff --git a/doc/source/release/1.19.2-notes.rst b/doc/source/release/1.19.2-notes.rst
new file mode 100644
index 000000000..1267d5eb1
--- /dev/null
+++ b/doc/source/release/1.19.2-notes.rst
@@ -0,0 +1,57 @@
+.. currentmodule:: numpy
+
+==========================
+NumPy 1.19.2 Release Notes
+==========================
+
+NumPy 1.19.2 fixes several bugs, prepares for the upcoming Cython 3.x release.
+and pins setuptools to keep distutils working while upstream modifications are
+ongoing. The aarch64 wheels are built with the latest manylinux2014 release
+that fixes the problem of differing page sizes used by different linux distros.
+
+This release supports Python 3.6-3.8. Cython >= 0.29.21 needs to be used when
+building with Python 3.9 for testing purposes.
+
+There is a known problem with Windows 10 version=2004 and OpenBLAS svd that we
+are trying to debug. If you are running that Windows version you should use a
+NumPy version that links to the MKL library, earlier Windows versions are fine.
+
+Improvements
+============
+
+Add NumPy declarations for Cython 3.0 and later
+-----------------------------------------------
+The pxd declarations for Cython 3.0 were improved to avoid using deprecated
+NumPy C-API features.  Extension modules built with Cython 3.0+ that use NumPy
+can now set the C macro ``NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION`` to avoid
+C compiler warnings about deprecated API usage.
+
+Contributors
+============
+
+A total of 8 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* Charles Harris
+* Matti Picus
+* Pauli Virtanen
+* Philippe Ombredanne +
+* Sebastian Berg
+* Stefan Behnel +
+* Stephan Loyd +
+* Zac Hatfield-Dodds
+
+Pull requests merged
+====================
+
+A total of 9 pull requests were merged for this release.
+
+* `#16959 <https://github.com/numpy/numpy/pull/16959>`__: TST: Change aarch64 to arm64 in travis.yml.
+* `#16998 <https://github.com/numpy/numpy/pull/16998>`__: MAINT: Configure hypothesis in ``np.test()`` for determinism,...
+* `#17000 <https://github.com/numpy/numpy/pull/17000>`__: BLD: pin setuptools < 49.2.0
+* `#17015 <https://github.com/numpy/numpy/pull/17015>`__: ENH: Add NumPy declarations to be used by Cython 3.0+
+* `#17125 <https://github.com/numpy/numpy/pull/17125>`__: BUG: Remove non-threadsafe sigint handling from fft calculation
+* `#17243 <https://github.com/numpy/numpy/pull/17243>`__: BUG: core: fix ilp64 blas dot/vdot/... for strides > int32 max
+* `#17244 <https://github.com/numpy/numpy/pull/17244>`__: DOC: Use SPDX license expressions with correct license
+* `#17245 <https://github.com/numpy/numpy/pull/17245>`__: DOC: Fix the link to the quick-start in the old API functions
+* `#17272 <https://github.com/numpy/numpy/pull/17272>`__: BUG: fix pickling of arrays larger than 2GiB
diff --git a/doc/source/user/building.rst b/doc/source/user/building.rst
index 54ece3da3..47399139e 100644
--- a/doc/source/user/building.rst
+++ b/doc/source/user/building.rst
@@ -142,6 +142,16 @@ will prefer to use ATLAS, then BLIS, then OpenBLAS and as a last resort MKL.
 If neither of these exists the build will fail (names are compared
 lower case).
 
+Alternatively one may use ``!`` or ``^`` to negate all items::
+
+        NPY_BLAS_ORDER='^blas,atlas' python setup.py build
+
+will allow using anything **but** NetLIB BLAS and ATLAS libraries, the order of the above
+list is retained.
+
+One cannot mix negation and positives, nor have multiple negations, such cases will
+raise an error.
+
 LAPACK
 ~~~~~~
 
@@ -165,6 +175,17 @@ will prefer to use ATLAS, then OpenBLAS and as a last resort MKL.
 If neither of these exists the build will fail (names are compared
 lower case).
 
+Alternatively one may use ``!`` or ``^`` to negate all items::
+
+        NPY_LAPACK_ORDER='^lapack' python setup.py build
+
+will allow using anything **but** the NetLIB LAPACK library, the order of the above
+list is retained.
+
+One cannot mix negation and positives, nor have multiple negations, such cases will
+raise an error.
+
+
 .. deprecated:: 1.20
   The native libraries on macOS, provided by Accelerate, are not fit for use
   in NumPy since they have bugs that cause wrong output under easily reproducible
diff --git a/doc/source/user/how-to-how-to.rst b/doc/source/user/how-to-how-to.rst
new file mode 100644
index 000000000..de8afc28a
--- /dev/null
+++ b/doc/source/user/how-to-how-to.rst
@@ -0,0 +1,118 @@
+.. _how-to-how-to:
+
+##############################################################################
+How to write a NumPy how-to
+##############################################################################
+
+How-tos get straight to the point -- they
+
+  - answer a focused question, or
+  - narrow a broad question into focused questions that the user can
+    choose among.
+
+******************************************************************************
+A stranger has asked for directions...
+******************************************************************************
+
+**"I need to refuel my car."**
+
+******************************************************************************
+Give a brief but explicit answer
+******************************************************************************
+
+  - `"Three kilometers/miles, take a right at Hayseed Road, it's on your left."`
+
+Add helpful details for newcomers ("Hayseed Road", even though it's the only
+turnoff at three km/mi). But not irrelevant ones:
+
+  - Don't also give directions from Route 7.
+  - Don't explain why the town has only one filling station.
+
+If there's related background (tutorial, explanation, reference, alternative
+approach), bring it to the user's attention with a link ("Directions from Route 7,"
+"Why so few filling stations?").
+
+
+******************************************************************************
+Delegate
+******************************************************************************
+
+  - `"Three km/mi, take a right at Hayseed Road, follow the signs."`
+
+If the information is already documented and succinct enough for a how-to,
+just link to it, possibly after an introduction ("Three km/mi, take a right").
+
+******************************************************************************
+If the question is broad, narrow and redirect it
+******************************************************************************
+
+ **"I want to see the sights."**
+
+The `See the sights` how-to should link to a set of narrower how-tos:
+
+- Find historic buildings
+- Find scenic lookouts
+- Find the town center
+
+and these might in turn link to still narrower how-tos -- so the town center
+page might link to
+
+   - Find the court house
+   - Find city hall
+
+By organizing how-tos this way, you not only display the options for people
+who need to narrow their question, you also have provided answers for users
+who start with narrower questions ("I want to see historic buildings," "Which
+way to city hall?").
+
+******************************************************************************
+If there are many steps, break them up
+******************************************************************************
+
+If a how-to has many steps:
+
+  - Consider breaking a step out into an individual how-to and linking to it.
+  - Include subheadings. They help readers grasp what's coming and return
+    where they left off.
+
+******************************************************************************
+Why write how-tos when there's Stack Overflow, Reddit, Gitter...?
+******************************************************************************
+
+ - We have authoritative answers.
+ - How-tos make the site less forbidding to non-experts.
+ - How-tos bring people into the site and help them discover other information
+   that's here .
+ - Creating how-tos helps us see NumPy usability through new eyes.
+
+******************************************************************************
+Aren't how-tos and tutorials the same thing?
+******************************************************************************
+
+People use the terms "how-to" and "tutorial" interchangeably, but we draw a
+distinction, following Daniele Procida's `taxonomy of documentation`_.
+
+ .. _`taxonomy of documentation`: https://documentation.divio.com/
+
+Documentation needs to meet users where they are.  `How-tos` offer get-it-done
+information; the user wants steps to copy and doesn't necessarily want to
+understand NumPy. `Tutorials` are warm-fuzzy information; the user wants a
+feel for some aspect of NumPy (and again, may or may not care about deeper
+knowledge).
+
+We distinguish both tutorials and how-tos from `Explanations`, which are
+deep dives intended to give understanding rather than immediate assistance,
+and `References`, which give complete, autoritative data on some concrete
+part of NumPy (like its API) but aren't obligated to paint a broader picture.
+
+For more on tutorials, see the `tutorial how-to`_.
+
+.. _`tutorial how-to`: https://github.com/numpy/numpy-tutorials/blob/master/tutorial_style.ipynb
+
+
+******************************************************************************
+Is this page an example of a how-to?
+******************************************************************************
+
+Yes -- until the sections with question-mark headings; they explain rather
+than giving directions. In a how-to, those would be links.
+\ No newline at end of file
diff --git a/doc/source/user/howtos_index.rst b/doc/source/user/howtos_index.rst
index c052286b9..45e013e6f 100644
--- a/doc/source/user/howtos_index.rst
+++ b/doc/source/user/howtos_index.rst
@@ -11,4 +11,5 @@ the package, see the :ref:`API reference <reference>`.
 .. toctree::
    :maxdepth: 1
 
+   how-to-how-to
    ionumpy
diff --git a/doc/source/user/numpy-for-matlab-users.rst b/doc/source/user/numpy-for-matlab-users.rst
index 547d5b2a0..eb011de63 100644
--- a/doc/source/user/numpy-for-matlab-users.rst
+++ b/doc/source/user/numpy-for-matlab-users.rst
@@ -7,12 +7,9 @@ NumPy for MATLAB users
 Introduction
 ============
 
-MATLAB® and NumPy/SciPy have a lot in common. But there are many
-differences. NumPy and SciPy were created to do numerical and scientific
-computing in the most natural way with Python, not to be MATLAB clones.
-This page is intended to be a place to collect wisdom about the
-differences, mostly for the purpose of helping proficient MATLAB users
-become proficient NumPy and SciPy users.
+MATLAB® and NumPy have a lot in common, but NumPy was created to work with
+Python, not to be a MATLAB clone.  This guide will help MATLAB users get started
+with NumPy. 
 
 .. raw:: html
 
@@ -20,234 +17,184 @@ become proficient NumPy and SciPy users.
    table.docutils td { border: solid 1px #ccc; }
    </style>
 
-Some Key Differences
+Some key differences
 ====================
 
 .. list-table::
-
-   * - In MATLAB, the basic data type is a multidimensional array of
-       double precision floating point numbers.  Most expressions take such
-       arrays and return such arrays.  Operations on the 2-D instances of
-       these arrays are designed to act more or less like matrix operations
-       in linear algebra.
-     - In NumPy the basic type is a multidimensional ``array``.  Operations
-       on these arrays in all dimensionalities including 2D are element-wise
-       operations.  One needs to use specific functions for linear algebra
-       (though for matrix multiplication, one can use the ``@`` operator
-       in python 3.5 and above).
-
-   * - MATLAB uses 1 (one) based indexing. The initial element of a
-       sequence is found using a(1).
+   :class: docutils
+
+   * - In MATLAB, the basic type, even for scalars, is a
+       multidimensional array. Array assignments in MATLAB are stored as
+       2D arrays of double precision floating point numbers, unless you
+       specify the number of dimensions and type.  Operations on the 2D
+       instances of these arrays are modeled on matrix operations in
+       linear algebra. 
+
+     - In NumPy, the basic type is a multidimensional ``array``.  Array
+       assignments in NumPy are usually stored as :ref:`n-dimensional arrays<arrays>` with the
+       minimum type required to hold the objects in sequence, unless you
+       specify the number of dimensions and type. NumPy performs
+       operations element-by-element, so multiplying 2D arrays with
+       ``*`` is not a matrix multiplication -- it's an
+       element-by-element multiplication. (The ``@`` operator, available
+       since Python 3.5, can be used for conventional matrix
+       multiplication.)
+
+   * - MATLAB numbers indices from 1; ``a(1)`` is the first element.
        :ref:`See note INDEXING <numpy-for-matlab-users.notes>`
-     - Python uses 0 (zero) based indexing. The initial element of a
-       sequence is found using a[0].
-
-   * - MATLAB's scripting language was created for doing linear algebra.
-       The syntax for basic matrix operations is nice and clean, but the API
-       for adding GUIs and making full-fledged applications is more or less
-       an afterthought.
-     - NumPy is  based on Python, which was designed from the outset to be
-       an excellent general-purpose programming language.  While MATLAB's
-       syntax for some array manipulations is more compact than
-       NumPy's, NumPy (by virtue of being an add-on to Python) can do many
-       things that MATLAB just cannot, for instance dealing properly with
-       stacks of matrices.
-
-   * - In MATLAB, arrays have pass-by-value semantics, with a lazy
-       copy-on-write scheme to prevent actually creating copies until they
-       are actually needed.  Slice operations copy parts of the array.
-     - In NumPy arrays have pass-by-reference semantics.  Slice operations
-       are views into an array.
-
-
-'array' or 'matrix'? Which should I use?
-========================================
-
-Historically, NumPy has provided a special matrix type, `np.matrix`, which
-is a subclass of ndarray which makes binary operations linear algebra
-operations. You may see it used in some existing code instead of `np.array`.
-So, which one to use?
-
-Short answer
-------------
-
-**Use arrays**.
-
--  They are the standard vector/matrix/tensor type of numpy. Many numpy
-   functions return arrays, not matrices.
--  There is a clear distinction between element-wise operations and
-   linear algebra operations.
--  You can have standard vectors or row/column vectors if you like.
-
-Until Python 3.5 the only disadvantage of using the array type was that you
-had to use ``dot`` instead of ``*`` to multiply (reduce) two tensors
-(scalar product, matrix vector multiplication etc.). Since Python 3.5 you
-can use the matrix multiplication ``@`` operator.
-
-Given the above, we intend to deprecate ``matrix`` eventually.
-
-Long answer
------------
-
-NumPy contains both an ``array`` class and a ``matrix`` class. The
-``array`` class is intended to be a general-purpose n-dimensional array
-for many kinds of numerical computing, while ``matrix`` is intended to
-facilitate linear algebra computations specifically. In practice there
-are only a handful of key differences between the two.
-
--  Operators ``*`` and ``@``, functions ``dot()``, and ``multiply()``:
-
-   -  For ``array``, **``*`` means element-wise multiplication**, while
-      **``@`` means matrix multiplication**; they have associated functions
-      ``multiply()`` and ``dot()``.  (Before python 3.5, ``@`` did not exist
-      and one had to use ``dot()`` for matrix multiplication).
-   -  For ``matrix``, **``*`` means matrix multiplication**, and for
-      element-wise multiplication one has to use the ``multiply()`` function.
-
--  Handling of vectors (one-dimensional arrays)
-
-   -  For ``array``, the **vector shapes 1xN, Nx1, and N are all different
-      things**. Operations like ``A[:,1]`` return a one-dimensional array of
-      shape N, not a two-dimensional array of shape Nx1. Transpose on a
-      one-dimensional ``array`` does nothing.
-   -  For ``matrix``, **one-dimensional arrays are always upconverted to 1xN
-      or Nx1 matrices** (row or column vectors). ``A[:,1]`` returns a
-      two-dimensional matrix of shape Nx1.
-
--  Handling of higher-dimensional arrays (ndim > 2)
-
-   -  ``array`` objects **can have number of dimensions > 2**;
-   -  ``matrix`` objects **always have exactly two dimensions**.
-
--  Convenience attributes
-
-   -  ``array`` **has a .T attribute**, which returns the transpose of
-      the data.
-   -  ``matrix`` **also has .H, .I, and .A attributes**, which return
-      the conjugate transpose, inverse, and ``asarray()`` of the matrix,
-      respectively.
-
--  Convenience constructor
+     - NumPy, like Python, numbers indices from 0; ``a[0]`` is the first
+       element.
 
-   -  The ``array`` constructor **takes (nested) Python sequences as
-      initializers**. As in, ``array([[1,2,3],[4,5,6]])``.
-   -  The ``matrix`` constructor additionally **takes a convenient
-      string initializer**. As in ``matrix("[1 2 3; 4 5 6]")``.
-
-There are pros and cons to using both:
-
--  ``array``
-
-   -  ``:)`` Element-wise multiplication is easy: ``A*B``.
-   -  ``:(`` You have to remember that matrix multiplication has its own
-      operator, ``@``.
-   -  ``:)`` You can treat one-dimensional arrays as *either* row or column
-      vectors. ``A @ v`` treats ``v`` as a column vector, while
-      ``v @ A`` treats ``v`` as a row vector. This can save you having to
-      type a lot of transposes.
-   -  ``:)`` ``array`` is the "default" NumPy type, so it gets the most
-      testing, and is the type most likely to be returned by 3rd party
-      code that uses NumPy.
-   -  ``:)`` Is quite at home handling data of any number of dimensions.
-   -  ``:)`` Closer in semantics to tensor algebra, if you are familiar
-      with that.
-   -  ``:)`` *All* operations (``*``, ``/``, ``+``, ``-`` etc.) are
-      element-wise.
-   -  ``:(`` Sparse matrices from ``scipy.sparse`` do not interact as well
-      with arrays.
-
--  ``matrix``
-
-   -  ``:\\`` Behavior is more like that of MATLAB matrices.
-   -  ``<:(`` Maximum of two-dimensional. To hold three-dimensional data you
-      need ``array`` or perhaps a Python list of ``matrix``.
-   -  ``<:(`` Minimum of two-dimensional. You cannot have vectors. They must be
-      cast as single-column or single-row matrices.
-   -  ``<:(`` Since ``array`` is the default in NumPy, some functions may
-      return an ``array`` even if you give them a ``matrix`` as an
-      argument. This shouldn't happen with NumPy functions (if it does
-      it's a bug), but 3rd party code based on NumPy may not honor type
-      preservation like NumPy does.
-   -  ``:)`` ``A*B`` is matrix multiplication, so it looks just like you write
-      it in linear algebra (For Python >= 3.5 plain arrays have the same
-      convenience with the ``@`` operator).
-   -  ``<:(`` Element-wise multiplication requires calling a function,
-      ``multiply(A,B)``.
-   -  ``<:(`` The use of operator overloading is a bit illogical: ``*``
-      does not work element-wise but ``/`` does.
-   -  Interaction with ``scipy.sparse`` is a bit cleaner.
-
-The ``array`` is thus much more advisable to use.  Indeed, we intend to
-deprecate ``matrix`` eventually.
-
-Table of Rough MATLAB-NumPy Equivalents
+   * - MATLAB's scripting language was created for linear algebra so the
+       syntax for some array manipulations is more compact than
+       NumPy's. On the other hand, the API for adding GUIs and creating 
+       full-fledged applications is more or less an afterthought.
+     - NumPy is  based on Python, a
+       general-purpose language.  The advantage to NumPy
+       is access to Python libraries including: `SciPy
+       <https://www.scipy.org/>`_, `Matplotlib <https://matplotlib.org/>`_,
+       `Pandas <https://pandas.pydata.org/>`_, `OpenCV <https://opencv.org/>`_,
+       and more. In addition, Python is often `embedded as a scripting language
+       <https://en.wikipedia.org/wiki/List_of_Python_software#Embedded_as_a_scripting_language>`_
+       in other software, allowing NumPy to be used there too. 
+
+   * - MATLAB array slicing uses pass-by-value semantics, with a lazy
+       copy-on-write scheme to prevent creating copies until they are
+       needed. Slicing operations copy parts of the array.
+     - NumPy array slicing uses pass-by-reference, that does not copy
+       the arguments. Slicing operations are views into an array.
+   
+
+Rough equivalents
 =======================================
 
 The table below gives rough equivalents for some common MATLAB
-expressions. **These are not exact equivalents**, but rather should be
-taken as hints to get you going in the right direction. For more detail
-read the built-in documentation on the NumPy functions.
+expressions. These are similar expressions, not equivalents. For
+details, see the :ref:`documentation<reference>`.
 
 In the table below, it is assumed that you have executed the following
 commands in Python:
 
 ::
 
-    from numpy import *
-    import scipy.linalg
+    import numpy as np
+    from scipy import io, integrate, linalg, signal
+    from scipy.sparse.linalg import eigs
 
 Also assume below that if the Notes talk about "matrix" that the
 arguments are two-dimensional entities.
 
-General Purpose Equivalents
+General purpose equivalents
 ---------------------------
 
 .. list-table::
    :header-rows: 1
 
-   * - **MATLAB**
-     - **numpy**
-     - **Notes**
+   * - MATLAB
+     - NumPy
+     - Notes
 
    * - ``help func``
-     - ``info(func)`` or ``help(func)`` or ``func?`` (in Ipython)
+     - ``info(func)`` or ``help(func)`` or ``func?`` (in IPython)
      - get help on the function *func*
 
    * - ``which func``
-     - `see note HELP <numpy-for-matlab-users.notes>`__
+     - :ref:`see note HELP <numpy-for-matlab-users.notes>`
      - find out where *func* is defined
 
    * - ``type func``
-     - ``source(func)`` or ``func??`` (in Ipython)
+     - ``np.source(func)`` or ``func??`` (in IPython)
      - print source for *func* (if not a native function)
 
+   * - ``% comment``
+     - ``# comment``
+     - comment a line of code with the text ``comment``
+
+   * - ::
+
+         for i=1:3
+             fprintf('%i\n',i)
+         end
+
+     - ::
+
+         for i in range(1, 4):
+            print(i)
+
+     - use a for-loop to print the numbers 1, 2, and 3 using :py:class:`range <range>`
+
    * - ``a && b``
      - ``a and b``
-     - short-circuiting logical  AND operator (Python native operator);
+     - short-circuiting logical AND operator (:ref:`Python native operator <python:boolean>`);
        scalar arguments only
 
    * - ``a || b``
      - ``a or b``
-     - short-circuiting logical OR operator (Python native operator);
+     - short-circuiting logical OR operator (:ref:`Python native operator <python:boolean>`);
        scalar arguments only
 
+   * - .. code:: matlab
+        
+        >> 4 == 4
+        ans = 1
+        >> 4 == 5
+        ans = 0
+
+     - ::
+
+        >>> 4 == 4
+        True
+        >>> 4 == 5
+        False
+
+     - The :ref:`boolean objects <python:bltin-boolean-values>`
+       in Python are ``True`` and ``False``, as opposed to MATLAB
+       logical types of ``1`` and ``0``. 
+
+   * - .. code:: matlab
+
+         a=4
+         if a==4
+             fprintf('a = 4\n')
+         elseif a==5
+             fprintf('a = 5\n')
+         end
+
+     - ::
+
+         a = 4
+         if a == 4:
+             print('a = 4')
+         elif a == 5: 
+             print('a = 5')
+
+     - create an if-else statement to check if ``a`` is 4 or 5 and print result
+
    * - ``1*i``, ``1*j``,  ``1i``, ``1j``
      - ``1j``
      - complex numbers
 
    * - ``eps``
-     - ``np.spacing(1)``
-     - Distance between 1 and the nearest floating point number.
+     - ``np.finfo(float).eps`` or ``np.spacing(1)``
+     - Upper bound to relative error due to rounding in 64-bit floating point
+       arithmetic.
+
+   * - ``load data.mat``
+     - ``io.loadmat('data.mat')``
+     - Load MATLAB variables saved to the file ``data.mat``. (Note: When saving arrays to
+       ``data.mat`` in MATLAB/Octave, use a recent binary format. :func:`scipy.io.loadmat`
+       will create a dictionary with the saved arrays and further information.)
 
    * - ``ode45``
-     - ``scipy.integrate.solve_ivp(f)``
+     - ``integrate.solve_ivp(f)``
      - integrate an ODE with Runge-Kutta 4,5
 
    * - ``ode15s``
-     - ``scipy.integrate.solve_ivp(f, method='BDF')``
+     - ``integrate.solve_ivp(f, method='BDF')``
      - integrate an ODE with BDF method
 
-Linear Algebra Equivalents
+
+Linear algebra equivalents
 --------------------------
 
 .. list-table::
@@ -258,16 +205,16 @@ Linear Algebra Equivalents
      - Notes
 
    * - ``ndims(a)``
-     - ``ndim(a)`` or ``a.ndim``
-     - get the number of dimensions of an array
+     - ``np.ndim(a)`` or ``a.ndim``
+     - number of dimensions of array ``a``
 
    * - ``numel(a)``
-     - ``size(a)`` or ``a.size``
-     - get the number of elements of an array
+     - ``np.size(a)`` or ``a.size``
+     - number of elements of array ``a``
 
    * - ``size(a)``
-     - ``shape(a)`` or ``a.shape``
-     - get the "size" of the matrix
+     - ``np.shape(a)`` or ``a.shape``
+     - "size" of array ``a``
 
    * - ``size(a,n)``
      - ``a.shape[n-1]``
@@ -276,45 +223,45 @@ Linear Algebra Equivalents
        See note :ref:`INDEXING <numpy-for-matlab-users.notes>`)
 
    * - ``[ 1 2 3; 4 5 6 ]``
-     - ``array([[1.,2.,3.], [4.,5.,6.]])``
-     - 2x3 matrix literal
+     - ``np.array([[1. ,2. ,3.], [4. ,5. ,6.]])``
+     - define a 2x3 2D array
 
    * - ``[ a b; c d ]``
-     - ``block([[a,b], [c,d]])``
+     - ``np.block([[a, b], [c, d]])``
      - construct a matrix from blocks ``a``, ``b``, ``c``, and ``d``
 
    * - ``a(end)``
      - ``a[-1]``
-     - access last element in the 1xn matrix ``a``
+     - access last element in MATLAB vector (1xn or nx1) or 1D NumPy array
+       ``a`` (length n)
 
    * - ``a(2,5)``
-     - ``a[1,4]``
-     - access element in second row, fifth column
+     - ``a[1, 4]``
+     - access element in second row, fifth column in 2D array ``a``
 
    * - ``a(2,:)``
-     - ``a[1]`` or  ``a[1,:]``
-     - entire second row of ``a``
+     - ``a[1]`` or  ``a[1, :]``
+     - entire second row of 2D array ``a``
 
    * - ``a(1:5,:)``
-     - ``a[0:5]`` or ``a[:5]`` or ``a[0:5,:]``
-     - the first five rows of ``a``
+     - ``a[0:5]`` or ``a[:5]`` or ``a[0:5, :]``
+     - first 5 rows of 2D array ``a``
 
    * - ``a(end-4:end,:)``
      - ``a[-5:]``
-     - the last five rows of ``a``
+     - last 5 rows of 2D array ``a``
 
    * - ``a(1:3,5:9)``
-     - ``a[0:3][:,4:9]``
-     - rows one to three and columns five to nine of ``a``.  This gives
-       read-only access.
+     - ``a[0:3, 4:9]``
+     - The first through third rows and fifth through ninth columns of a 2D array, ``a``. 
 
    * - ``a([2,4,5],[1,3])``
-     - ``a[ix_([1,3,4],[0,2])]``
+     - ``a[np.ix_([1, 3, 4], [0, 2])]``
      - rows 2,4 and 5 and columns 1 and 3.  This allows the matrix to be
        modified, and doesn't require a regular slice.
 
    * - ``a(3:2:21,:)``
-     - ``a[ 2:21:2,:]``
+     - ``a[2:21:2,:]``
      - every other row of ``a``, starting with the third and going to the
        twenty-first
 
@@ -323,11 +270,11 @@ Linear Algebra Equivalents
      - every other row of ``a``, starting with the first
 
    * - ``a(end:-1:1,:)``  or ``flipud(a)``
-     -  ``a[ ::-1,:]``
+     -  ``a[::-1,:]``
      - ``a`` with rows in reverse order
 
    * - ``a([1:end 1],:)``
-     -  ``a[r_[:len(a),0]]``
+     -  ``a[np.r_[:len(a),0]]``
      - ``a`` with copy of the first row appended to the end
 
    * - ``a.'``
@@ -354,30 +301,30 @@ Linear Algebra Equivalents
      - ``a**3``
      - element-wise exponentiation
 
-   * - ``(a>0.5)``
-     - ``(a>0.5)``
+   * - ``(a > 0.5)``
+     - ``(a > 0.5)``
      - matrix whose i,jth element is (a_ij > 0.5).  The MATLAB result is an
-       array of 0s and 1s.  The NumPy result is an array of the boolean
+       array of logical values 0 and 1.  The NumPy result is an array of the boolean
        values ``False`` and ``True``.
 
-   * - ``find(a>0.5)``
-     - ``nonzero(a>0.5)``
+   * - ``find(a > 0.5)``
+     - ``np.nonzero(a > 0.5)``
      - find the indices where (``a`` > 0.5)
 
-   * - ``a(:,find(v>0.5))``
-     - ``a[:,nonzero(v>0.5)[0]]``
+   * - ``a(:,find(v > 0.5))``
+     - ``a[:,np.nonzero(v > 0.5)[0]]``
      - extract the columms of ``a`` where vector v > 0.5
 
    * - ``a(:,find(v>0.5))``
-     - ``a[:,v.T>0.5]``
+     - ``a[:, v.T > 0.5]``
      - extract the columms of ``a`` where column vector v > 0.5
 
    * - ``a(a<0.5)=0``
-     - ``a[a<0.5]=0``
+     - ``a[a < 0.5]=0``
      - ``a`` with elements less than 0.5 zeroed out
 
    * - ``a .* (a>0.5)``
-     - ``a * (a>0.5)``
+     - ``a * (a > 0.5)``
      - ``a`` with elements less than 0.5 zeroed out
 
    * - ``a(:) = 3``
@@ -386,11 +333,11 @@ Linear Algebra Equivalents
 
    * - ``y=x``
      - ``y = x.copy()``
-     - numpy assigns by reference
+     - NumPy assigns by reference
 
    * - ``y=x(2,:)``
-     - ``y = x[1,:].copy()``
-     - numpy slices are by reference
+     - ``y = x[1, :].copy()``
+     - NumPy slices are by reference
 
    * - ``y=x(:)``
      - ``y = x.flatten()``
@@ -398,62 +345,74 @@ Linear Algebra Equivalents
        same data ordering as in MATLAB, use ``x.flatten('F')``.
 
    * - ``1:10``
-     - ``arange(1.,11.)`` or ``r_[1.:11.]`` or  ``r_[1:10:10j]``
+     - ``np.arange(1., 11.)`` or ``np.r_[1.:11.]`` or  ``np.r_[1:10:10j]``
      - create an increasing vector (see note :ref:`RANGES
        <numpy-for-matlab-users.notes>`)
 
    * - ``0:9``
-     - ``arange(10.)`` or  ``r_[:10.]`` or  ``r_[:9:10j]``
+     - ``np.arange(10.)`` or  ``np.r_[:10.]`` or  ``np.r_[:9:10j]``
      - create an increasing vector (see note :ref:`RANGES
        <numpy-for-matlab-users.notes>`)
 
    * - ``[1:10]'``
-     - ``arange(1.,11.)[:, newaxis]``
+     - ``np.arange(1.,11.)[:, np.newaxis]``
      - create a column vector
 
    * - ``zeros(3,4)``
-     - ``zeros((3,4))``
+     - ``np.zeros((3, 4))``
      - 3x4 two-dimensional array full of 64-bit floating point zeros
 
    * - ``zeros(3,4,5)``
-     - ``zeros((3,4,5))``
+     - ``np.zeros((3, 4, 5))``
      - 3x4x5 three-dimensional array full of 64-bit floating point zeros
 
    * - ``ones(3,4)``
-     - ``ones((3,4))``
+     - ``np.ones((3, 4))``
      - 3x4 two-dimensional array full of 64-bit floating point ones
 
    * - ``eye(3)``
-     - ``eye(3)``
+     - ``np.eye(3)``
      - 3x3 identity matrix
 
    * - ``diag(a)``
-     - ``diag(a)``
-     - vector of diagonal elements of ``a``
+     - ``np.diag(a)``
+     - returns a vector of the diagonal elements of 2D array, ``a``
+
+   * - ``diag(v,0)``
+     - ``np.diag(v, 0)``
+     - returns a square diagonal matrix whose nonzero values are the elements of
+       vector, ``v``
 
-   * - ``diag(a,0)``
-     - ``diag(a,0)``
-     - square diagonal matrix whose nonzero values are the elements of
-       ``a``
+   * - .. code:: matlab
+         
+         rng(42,'twister')
+         rand(3,4)
 
-   * - ``rand(3,4)``
-     - ``random.rand(3,4)`` or ``random.random_sample((3, 4))``
-     - random 3x4 matrix
+     - ::
+
+         from numpy.random import default_rng
+         rng = default_rng(42)
+         rng.random(3, 4) 
+
+       or older version: ``random.rand((3, 4))``
+
+     - generate a random 3x4 array with default random number generator and
+       seed = 42
 
    * - ``linspace(1,3,4)``
-     - ``linspace(1,3,4)``
+     - ``np.linspace(1,3,4)``
      - 4 equally spaced samples between 1 and 3, inclusive
 
    * - ``[x,y]=meshgrid(0:8,0:5)``
-     - ``mgrid[0:9.,0:6.]`` or ``meshgrid(r_[0:9.],r_[0:6.]``
+     - ``np.mgrid[0:9.,0:6.]`` or ``np.meshgrid(r_[0:9.],r_[0:6.]``
      - two 2D arrays: one of x values, the other of y values
 
    * -
-     - ``ogrid[0:9.,0:6.]`` or ``ix_(r_[0:9.],r_[0:6.]``
+     - ``ogrid[0:9.,0:6.]`` or ``np.ix_(np.r_[0:9.],np.r_[0:6.]``
      - the best way to eval functions on a grid
 
    * - ``[x,y]=meshgrid([1,2,4],[2,4,5])``
-     - ``meshgrid([1,2,4],[2,4,5])``
+     - ``np.meshgrid([1,2,4],[2,4,5])``
      -
 
    * -
@@ -461,37 +420,38 @@ Linear Algebra Equivalents
      - the best way to eval functions on a grid
 
    * - ``repmat(a, m, n)``
-     - ``tile(a, (m, n))``
+     - ``np.tile(a, (m, n))``
      - create m by n copies of ``a``
 
    * - ``[a b]``
-     - ``concatenate((a,b),1)`` or ``hstack((a,b))`` or
-       ``column_stack((a,b))`` or ``c_[a,b]``
+     - ``np.concatenate((a,b),1)`` or ``np.hstack((a,b))`` or
+       ``np.column_stack((a,b))`` or ``np.c_[a,b]``
      - concatenate columns of ``a`` and ``b``
 
    * - ``[a; b]``
-     - ``concatenate((a,b))`` or ``vstack((a,b))`` or ``r_[a,b]``
+     - ``np.concatenate((a,b))`` or ``np.vstack((a,b))`` or ``np.r_[a,b]``
      - concatenate rows of ``a`` and ``b``
 
    * - ``max(max(a))``
-     - ``a.max()``
-     - maximum element of ``a`` (with ndims(a)<=2 for MATLAB)
+     - ``a.max()`` or ``np.nanmax(a)``
+     - maximum element of ``a`` (with ndims(a)<=2 for MATLAB, if there are
+       NaN's, ``nanmax`` will ignore these and return largest value)
 
    * - ``max(a)``
      - ``a.max(0)``
-     - maximum element of each column of matrix ``a``
+     - maximum element of each column of array ``a``
 
    * - ``max(a,[],2)``
      - ``a.max(1)``
-     - maximum element of each row of matrix ``a``
+     - maximum element of each row of array ``a``
 
    * - ``max(a,b)``
-     - ``maximum(a, b)``
+     - ``np.maximum(a, b)``
      - compares ``a`` and ``b`` element-wise, and returns the maximum value
        from each pair
 
    * - ``norm(v)``
-     - ``sqrt(v @ v)`` or ``np.linalg.norm(v)``
+     - ``np.sqrt(v @ v)`` or ``np.linalg.norm(v)``
      - L2 norm of vector ``v``
 
    * - ``a & b``
@@ -500,7 +460,7 @@ Linear Algebra Equivalents
        LOGICOPS <numpy-for-matlab-users.notes>`
 
    * - ``a | b``
-     - ``logical_or(a,b)``
+     - ``np.logical_or(a,b)``
      - element-by-element OR operator (NumPy ufunc) :ref:`See note LOGICOPS
        <numpy-for-matlab-users.notes>`
 
@@ -514,90 +474,99 @@ Linear Algebra Equivalents
 
    * - ``inv(a)``
      - ``linalg.inv(a)``
-     - inverse of square matrix ``a``
+     - inverse of square 2D array ``a``
 
    * - ``pinv(a)``
      - ``linalg.pinv(a)``
-     - pseudo-inverse of matrix ``a``
+     - pseudo-inverse of 2D array ``a``
 
    * - ``rank(a)``
      - ``linalg.matrix_rank(a)``
-     - matrix rank of a 2D array / matrix ``a``
+     - matrix rank of a 2D array ``a``
 
    * - ``a\b``
-     - ``linalg.solve(a,b)`` if ``a`` is square; ``linalg.lstsq(a,b)``
+     - ``linalg.solve(a, b)`` if ``a`` is square; ``linalg.lstsq(a, b)``
        otherwise
      - solution of a x = b for x
 
    * - ``b/a``
-     - Solve a.T x.T = b.T instead
+     - Solve ``a.T x.T = b.T`` instead
      - solution of x a = b for x
 
    * - ``[U,S,V]=svd(a)``
      - ``U, S, Vh = linalg.svd(a), V = Vh.T``
      - singular value decomposition of ``a``
 
-   * - ``chol(a)``
-     - ``linalg.cholesky(a).T``
-     - cholesky factorization of a matrix (``chol(a)`` in MATLAB returns an
-       upper triangular matrix, but ``linalg.cholesky(a)`` returns a lower
-       triangular matrix)
+   * - ``c=chol(a)`` where ``a==c'*c``
+     - ``c = linalg.cholesky(a)`` where ``a == c@c.T``
+     - Cholesky factorization of a 2D array (``chol(a)`` in MATLAB returns an
+       upper triangular 2D array, but :func:`~scipy.linalg.cholesky` returns a lower
+       triangular 2D array)
 
    * - ``[V,D]=eig(a)``
      - ``D,V = linalg.eig(a)``
-     - eigenvalues and eigenvectors of ``a``
+     - eigenvalues :math:`\lambda` and eigenvectors :math:`\bar{v}` of ``a``,
+       where :math:`\lambda\bar{v}=\mathbf{a}\bar{v}`
 
    * - ``[V,D]=eig(a,b)``
-     - ``D,V = scipy.linalg.eig(a,b)``
-     - eigenvalues and eigenvectors of ``a``, ``b``
+     - ``D,V = linalg.eig(a, b)``
+     - eigenvalues :math:`\lambda` and eigenvectors :math:`\bar{v}` of
+       ``a``, ``b``
+       where :math:`\lambda\mathbf{b}\bar{v}=\mathbf{a}\bar{v}`
 
-   * - ``[V,D]=eigs(a,k)``
-     -
-     - find the ``k`` largest eigenvalues and eigenvectors of ``a``
+   * - ``[V,D]=eigs(a,3)``
+     - ``D,V = eigs(a, k = 3)``
+     - find the ``k=3`` largest eigenvalues and eigenvectors of 2D array, ``a``
 
    * - ``[Q,R,P]=qr(a,0)``
-     - ``Q,R = scipy.linalg.qr(a)``
+     - ``Q,R = linalg.qr(a)``
      - QR decomposition
 
-   * - ``[L,U,P]=lu(a)``
-     - ``L,U = scipy.linalg.lu(a)`` or ``LU,P=scipy.linalg.lu_factor(a)``
-     - LU decomposition (note: P(MATLAB) == transpose(P(numpy)) )
+   * - ``[L,U,P]=lu(a)`` where ``a==P'*L*U``
+     - ``P,L,U = linalg.lu(a)`` where ``a == P@L@U``
+     - LU decomposition (note: P(MATLAB) == transpose(P(NumPy)))
 
    * - ``conjgrad``
-     - ``scipy.sparse.linalg.cg``
+     - ``cg``
      - Conjugate gradients solver
 
    * - ``fft(a)``
-     - ``fft(a)``
+     - ``np.fft(a)``
      - Fourier transform of ``a``
 
    * - ``ifft(a)``
-     - ``ifft(a)``
+     - ``np.ifft(a)``
      - inverse Fourier transform of ``a``
 
    * - ``sort(a)``
-     - ``sort(a)`` or ``a.sort()``
-     - sort the matrix
+     - ``np.sort(a)`` or ``a.sort(axis=0)``
+     - sort each column of a 2D array, ``a``
 
-   * - ``[b,I] = sortrows(a,i)``
-     - ``I = argsort(a[:,i]), b=a[I,:]``
-     - sort the rows of the matrix
+   * - ``sort(a, 2)``
+     - ``np.sort(a, axis = 1)`` or ``a.sort(axis = 1)``
+     - sort the each row of 2D array, ``a``
 
-   * - ``regress(y,X)``
-     - ``linalg.lstsq(X,y)``
-     - multilinear regression
+   * - ``[b,I]=sortrows(a,1)``
+     - ``I = np.argsort(a[:, 0]); b = a[I,:]``
+     - save the array ``a`` as array ``b`` with rows sorted by the first column
+
+   * - ``x = Z\y``
+     - ``x = linalg.lstsq(Z, y)``
+     - perform a linear regression of the form :math:`\mathbf{Zx}=\mathbf{y}`
 
    * - ``decimate(x, q)``
-     - ``scipy.signal.resample(x, len(x)/q)``
+     - ``signal.resample(x, np.ceil(len(x)/q))``
      - downsample with low-pass filtering
 
    * - ``unique(a)``
-     - ``unique(a)``
-     -
+     - ``np.unique(a)``
+     - a vector of unique values in array ``a``
 
    * - ``squeeze(a)``
      - ``a.squeeze()``
-     -
+     - remove singleton dimensions of array ``a``. Note that MATLAB will always
+       return arrays of 2D or higher while NumPy will return arrays of 0D or
+       higher
 
 .. _numpy-for-matlab-users.notes:
 
@@ -605,11 +574,11 @@ Notes
 =====
 
 \ **Submatrix**: Assignment to a submatrix can be done with lists of
-indexes using the ``ix_`` command. E.g., for 2d array ``a``, one might
-do: ``ind=[1,3]; a[np.ix_(ind,ind)]+=100``.
+indices using the ``ix_`` command. E.g., for 2D array ``a``, one might
+do: ``ind=[1, 3]; a[np.ix_(ind, ind)] += 100``.
 
 \ **HELP**: There is no direct equivalent of MATLAB's ``which`` command,
-but the commands ``help`` and ``source`` will usually list the filename
+but the commands :func:`help`` and :func:`numpy.source` will usually list the filename
 where the function is located. Python also has an ``inspect`` module (do
 ``import inspect``) which provides a ``getfile`` that often works.
 
@@ -627,35 +596,35 @@ Dijkstra <https://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html
 and a 'slice' index (inside parentheses); however, in Python, constructs
 like ``0:5`` can *only* be used as a slice index (inside square
 brackets). Thus the somewhat quirky ``r_`` object was created to allow
-numpy to have a similarly terse range construction mechanism. Note that
+NumPy to have a similarly terse range construction mechanism. Note that
 ``r_`` is not called like a function or a constructor, but rather
 *indexed* using square brackets, which allows the use of Python's slice
 syntax in the arguments.
 
-\ **LOGICOPS**: & or \| in NumPy is bitwise AND/OR, while in MATLAB &
-and \| are logical AND/OR. The difference should be clear to anyone with
-significant programming experience. The two can appear to work the same,
-but there are important differences. If you would have used MATLAB's &
-or \| operators, you should use the NumPy ufuncs
-logical\_and/logical\_or. The notable differences between MATLAB's and
-NumPy's & and \| operators are:
+\ **LOGICOPS**: ``&`` or ``|`` in NumPy is bitwise AND/OR, while in MATLAB &
+and ``|`` are logical AND/OR. The two can appear to work the same,
+but there are important differences. If you would have used MATLAB's ``&``
+or ``|`` operators, you should use the NumPy ufuncs
+``logical_and``/``logical_or``. The notable differences between MATLAB's and
+NumPy's ``&`` and ``|`` operators are:
 
 -  Non-logical {0,1} inputs: NumPy's output is the bitwise AND of the
    inputs. MATLAB treats any non-zero value as 1 and returns the logical
-   AND. For example (3 & 4) in NumPy is 0, while in MATLAB both 3 and 4
-   are considered logical true and (3 & 4) returns 1.
+   AND. For example ``(3 & 4)`` in NumPy is ``0``, while in MATLAB both ``3``
+   and ``4``
+   are considered logical true and ``(3 & 4)`` returns ``1``.
 
 -  Precedence: NumPy's & operator is higher precedence than logical
-   operators like < and >; MATLAB's is the reverse.
+   operators like ``<`` and ``>``; MATLAB's is the reverse.
 
 If you know you have boolean arguments, you can get away with using
-NumPy's bitwise operators, but be careful with parentheses, like this: z
-= (x > 1) & (x < 2). The absence of NumPy operator forms of logical\_and
-and logical\_or is an unfortunate consequence of Python's design.
+NumPy's bitwise operators, but be careful with parentheses, like this: ``z
+= (x > 1) & (x < 2)``. The absence of NumPy operator forms of ``logical_and``
+and ``logical_or`` is an unfortunate consequence of Python's design.
 
 **RESHAPE and LINEAR INDEXING**: MATLAB always allows multi-dimensional
 arrays to be accessed using scalar or linear indices, NumPy does not.
-Linear indices are common in MATLAB programs, e.g. find() on a matrix
+Linear indices are common in MATLAB programs, e.g. ``find()`` on a matrix
 returns them, whereas NumPy's find behaves differently. When converting
 MATLAB code it might be necessary to first reshape a matrix to a linear
 sequence, perform some indexing operations and then reshape back. As
@@ -664,11 +633,132 @@ possible to do this fairly efficiently. Note that the scan order used by
 reshape in NumPy defaults to the 'C' order, whereas MATLAB uses the
 Fortran order. If you are simply converting to a linear sequence and
 back this doesn't matter. But if you are converting reshapes from MATLAB
-code which relies on the scan order, then this MATLAB code: z =
-reshape(x,3,4); should become z = x.reshape(3,4,order='F').copy() in
+code which relies on the scan order, then this MATLAB code: ``z =
+reshape(x,3,4);`` should become ``z = x.reshape(3,4,order='F').copy()`` in
 NumPy.
 
-Customizing Your Environment
+'array' or 'matrix'? Which should I use?
+========================================
+
+Historically, NumPy has provided a special matrix type, `np.matrix`, which
+is a subclass of ndarray which makes binary operations linear algebra
+operations. You may see it used in some existing code instead of `np.array`.
+So, which one to use?
+
+Short answer
+------------
+
+**Use arrays**.
+
+-  They support multidimensional array algebra that is supported in MATLAB
+-  They are the standard vector/matrix/tensor type of NumPy. Many NumPy
+   functions return arrays, not matrices.
+-  There is a clear distinction between element-wise operations and
+   linear algebra operations.
+-  You can have standard vectors or row/column vectors if you like.
+
+Until Python 3.5 the only disadvantage of using the array type was that you
+had to use ``dot`` instead of ``*`` to multiply (reduce) two tensors
+(scalar product, matrix vector multiplication etc.). Since Python 3.5 you
+can use the matrix multiplication ``@`` operator.
+
+Given the above, we intend to deprecate ``matrix`` eventually.
+
+Long answer
+-----------
+
+NumPy contains both an ``array`` class and a ``matrix`` class. The
+``array`` class is intended to be a general-purpose n-dimensional array
+for many kinds of numerical computing, while ``matrix`` is intended to
+facilitate linear algebra computations specifically. In practice there
+are only a handful of key differences between the two.
+
+-  Operators ``*`` and ``@``, functions ``dot()``, and ``multiply()``:
+
+   -  For ``array``, **``*`` means element-wise multiplication**, while
+      **``@`` means matrix multiplication**; they have associated functions
+      ``multiply()`` and ``dot()``.  (Before Python 3.5, ``@`` did not exist
+      and one had to use ``dot()`` for matrix multiplication).
+   -  For ``matrix``, **``*`` means matrix multiplication**, and for
+      element-wise multiplication one has to use the ``multiply()`` function.
+
+-  Handling of vectors (one-dimensional arrays)
+
+   -  For ``array``, the **vector shapes 1xN, Nx1, and N are all different
+      things**. Operations like ``A[:,1]`` return a one-dimensional array of
+      shape N, not a two-dimensional array of shape Nx1. Transpose on a
+      one-dimensional ``array`` does nothing.
+   -  For ``matrix``, **one-dimensional arrays are always upconverted to 1xN
+      or Nx1 matrices** (row or column vectors). ``A[:,1]`` returns a
+      two-dimensional matrix of shape Nx1.
+
+-  Handling of higher-dimensional arrays (ndim > 2)
+
+   -  ``array`` objects **can have number of dimensions > 2**;
+   -  ``matrix`` objects **always have exactly two dimensions**.
+
+-  Convenience attributes
+
+   -  ``array`` **has a .T attribute**, which returns the transpose of
+      the data.
+   -  ``matrix`` **also has .H, .I, and .A attributes**, which return
+      the conjugate transpose, inverse, and ``asarray()`` of the matrix,
+      respectively.
+
+-  Convenience constructor
+
+   -  The ``array`` constructor **takes (nested) Python sequences as
+      initializers**. As in, ``array([[1,2,3],[4,5,6]])``.
+   -  The ``matrix`` constructor additionally **takes a convenient
+      string initializer**. As in ``matrix("[1 2 3; 4 5 6]")``.
+
+There are pros and cons to using both:
+
+-  ``array``
+
+   -  ``:)`` Element-wise multiplication is easy: ``A*B``.
+   -  ``:(`` You have to remember that matrix multiplication has its own
+      operator, ``@``.
+   -  ``:)`` You can treat one-dimensional arrays as *either* row or column
+      vectors. ``A @ v`` treats ``v`` as a column vector, while
+      ``v @ A`` treats ``v`` as a row vector. This can save you having to
+      type a lot of transposes.
+   -  ``:)`` ``array`` is the "default" NumPy type, so it gets the most
+      testing, and is the type most likely to be returned by 3rd party
+      code that uses NumPy.
+   -  ``:)`` Is quite at home handling data of any number of dimensions.
+   -  ``:)`` Closer in semantics to tensor algebra, if you are familiar
+      with that.
+   -  ``:)`` *All* operations (``*``, ``/``, ``+``, ``-`` etc.) are
+      element-wise.
+   -  ``:(`` Sparse matrices from ``scipy.sparse`` do not interact as well
+      with arrays.
+
+-  ``matrix``
+
+   -  ``:\\`` Behavior is more like that of MATLAB matrices.
+   -  ``<:(`` Maximum of two-dimensional. To hold three-dimensional data you
+      need ``array`` or perhaps a Python list of ``matrix``.
+   -  ``<:(`` Minimum of two-dimensional. You cannot have vectors. They must be
+      cast as single-column or single-row matrices.
+   -  ``<:(`` Since ``array`` is the default in NumPy, some functions may
+      return an ``array`` even if you give them a ``matrix`` as an
+      argument. This shouldn't happen with NumPy functions (if it does
+      it's a bug), but 3rd party code based on NumPy may not honor type
+      preservation like NumPy does.
+   -  ``:)`` ``A*B`` is matrix multiplication, so it looks just like you write
+      it in linear algebra (For Python >= 3.5 plain arrays have the same
+      convenience with the ``@`` operator).
+   -  ``<:(`` Element-wise multiplication requires calling a function,
+      ``multiply(A,B)``.
+   -  ``<:(`` The use of operator overloading is a bit illogical: ``*``
+      does not work element-wise but ``/`` does.
+   -  Interaction with ``scipy.sparse`` is a bit cleaner.
+
+The ``array`` is thus much more advisable to use.  Indeed, we intend to
+deprecate ``matrix`` eventually.
+
+Customizing your environment
 ============================
 
 In MATLAB the main tool available to you for customizing the
@@ -696,26 +786,39 @@ this is just an example, not a statement of "best practices"):
 
     # Make all numpy available via shorter 'np' prefix
     import numpy as np
-    # Make all matlib functions accessible at the top level via M.func()
-    import numpy.matlib as M
-    # Make some matlib functions accessible directly at the top level via, e.g. rand(3,3)
-    from numpy.matlib import rand,zeros,ones,empty,eye
+    # 
+    # Make the SciPy linear algebra functions available as linalg.func()
+    # e.g. linalg.lu, linalg.eig (for general l*B@u==A@u solution)
+    from scipy import linalg
+    #
     # Define a Hermitian function
     def hermitian(A, **kwargs):
-        return np.transpose(A,**kwargs).conj()
-    # Make some shortcuts for transpose,hermitian:
-    #    np.transpose(A) --> T(A)
+        return np.conj(A,**kwargs).T
+    # Make a shortcut for hermitian:
     #    hermitian(A) --> H(A)
-    T = np.transpose
     H = hermitian
 
+To use the deprecated `matrix` and other `matlib` functions:
+
+::
+    
+    # Make all matlib functions accessible at the top level via M.func()
+    import numpy.matlib as M
+    # Make some matlib functions accessible directly at the top level via, e.g. rand(3,3)
+    from numpy.matlib import matrix,rand,zeros,ones,empty,eye
+
 Links
 =====
 
-See http://mathesaurus.sf.net/ for another MATLAB/NumPy
-cross-reference.
+Another somewhat outdated MATLAB/NumPy cross-reference can be found at
+http://mathesaurus.sf.net/
 
-An extensive list of tools for scientific work with python can be
+An extensive list of tools for scientific work with Python can be
 found in the `topical software page <https://scipy.org/topical-software.html>`__.
 
-MATLAB® and SimuLink® are registered trademarks of The MathWorks.
+See
+`List of Python software: scripting
+<https://en.wikipedia.org/wiki/List_of_Python_software#Embedded_as_a_scripting_language>`_
+for a list of softwares that use Python as a scripting language
+
+MATLAB® and SimuLink® are registered trademarks of The MathWorks, Inc.
diff --git a/doc/sphinxext b/doc/sphinxext
deleted file mode 160000
-Subproject b4c5fd17e2b85c2416a5e586933eee353b58bf7
diff --git a/doc_requirements.txt b/doc_requirements.txt
index e2694ba12..36b651c64 100644
--- a/doc_requirements.txt
+++ b/doc_requirements.txt
@@ -1,4 +1,5 @@
 sphinx>=2.2.0,<3.0
+numpydoc==1.1.0
 ipython
 scipy
 matplotlib
diff --git a/numpy/__init__.cython-30.pxd b/numpy/__init__.cython-30.pxd
index 4d9ec1fed..a2c451bc1 100644
--- a/numpy/__init__.cython-30.pxd
+++ b/numpy/__init__.cython-30.pxd
@@ -808,6 +808,29 @@ cdef extern from "numpy/ndarraytypes.h":
         int64_t num
 
 cdef extern from "numpy/arrayscalars.h":
+
+    # abstract types
+    ctypedef class numpy.generic [object PyObject]:
+        pass
+    ctypedef class numpy.number [object PyObject]:
+        pass
+    ctypedef class numpy.integer [object PyObject]:
+        pass
+    ctypedef class numpy.signedinteger [object PyObject]:
+        pass
+    ctypedef class numpy.unsignedinteger [object PyObject]:
+        pass
+    ctypedef class numpy.inexact [object PyObject]:
+        pass
+    ctypedef class numpy.floating [object PyObject]:
+        pass
+    ctypedef class numpy.complexfloating [object PyObject]:
+        pass
+    ctypedef class numpy.flexible [object PyObject]:
+        pass
+    ctypedef class numpy.character [object PyObject]:
+        pass
+
     ctypedef struct PyDatetimeScalarObject:
         # PyObject_HEAD
         npy_datetime obval
diff --git a/numpy/__init__.pxd b/numpy/__init__.pxd
index bf4298e59..fd704b7e3 100644
--- a/numpy/__init__.pxd
+++ b/numpy/__init__.pxd
@@ -766,6 +766,29 @@ cdef extern from "numpy/ndarraytypes.h":
         int64_t num
 
 cdef extern from "numpy/arrayscalars.h":
+
+    # abstract types
+    ctypedef class numpy.generic [object PyObject]:
+        pass
+    ctypedef class numpy.number [object PyObject]:
+        pass
+    ctypedef class numpy.integer [object PyObject]:
+        pass
+    ctypedef class numpy.signedinteger [object PyObject]:
+        pass
+    ctypedef class numpy.unsignedinteger [object PyObject]:
+        pass
+    ctypedef class numpy.inexact [object PyObject]:
+        pass
+    ctypedef class numpy.floating [object PyObject]:
+        pass
+    ctypedef class numpy.complexfloating [object PyObject]:
+        pass
+    ctypedef class numpy.flexible [object PyObject]:
+        pass
+    ctypedef class numpy.character [object PyObject]:
+        pass
+
     ctypedef struct PyDatetimeScalarObject:
         # PyObject_HEAD
         npy_datetime obval
diff --git a/numpy/__init__.py b/numpy/__init__.py
index 41c3dc42d..3e5277318 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -247,7 +247,7 @@ else:
             except KeyError:
                 pass
             else:
-                warnings.warn(msg, RuntimeWarning)
+                warnings.warn(msg, DeprecationWarning, stacklevel=2)
 
                 def _expired(*args, **kwds):
                     raise RuntimeError(msg)
diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index bf54207a4..d4eda6b31 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -66,10 +66,93 @@ from numpy.core.function_base import (
     geomspace,
 )
 
+from numpy.core.fromnumeric import (
+    take,
+    reshape,
+    choose,
+    repeat,
+    put,
+    swapaxes,
+    transpose,
+    partition,
+    argpartition,
+    sort,
+    argsort,
+    argmax,
+    argmin,
+    searchsorted,
+    resize,
+    squeeze,
+    diagonal,
+    trace,
+    ravel,
+    nonzero,
+    shape,
+    compress,
+    clip,
+    sum,
+    all,
+    any,
+    cumsum,
+    ptp,
+    amax,
+    amin,
+    prod,
+    cumprod,
+    ndim,
+    size,
+    around,
+    mean,
+    std,
+    var,
+)
+
 # Add an object to `__all__` if their stubs are defined in an external file;
 # their stubs will not be recognized otherwise.
 # NOTE: This is redundant for objects defined within this file.
-__all__ = ["linspace", "logspace", "geomspace"]
+__all__ = [
+    "linspace",
+    "logspace",
+    "geomspace",
+    "take",
+    "reshape",
+    "choose",
+    "repeat",
+    "put",
+    "swapaxes",
+    "transpose",
+    "partition",
+    "argpartition",
+    "sort",
+    "argsort",
+    "argmax",
+    "argmin",
+    "searchsorted",
+    "resize",
+    "squeeze",
+    "diagonal",
+    "trace",
+    "ravel",
+    "nonzero",
+    "shape",
+    "compress",
+    "clip",
+    "sum",
+    "all",
+    "any",
+    "cumsum",
+    "ptp",
+    "amax",
+    "amin",
+    "prod",
+    "cumprod",
+    "ndim",
+    "size",
+    "around",
+    "mean",
+    "std",
+    "var",
+]
 
 # TODO: remove when the full numpy namespace is defined
 def __getattr__(name: str) -> Any: ...
@@ -998,481 +1081,6 @@ def find_common_type(
     array_types: Sequence[DtypeLike], scalar_types: Sequence[DtypeLike]
 ) -> dtype: ...
 
-# Functions from np.core.fromnumeric
-_Mode = Literal["raise", "wrap", "clip"]
-_PartitionKind = Literal["introselect"]
-_SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
-_Side = Literal["left", "right"]
-
-# Various annotations for scalars
-
-# While dt.datetime and dt.timedelta are not technically part of NumPy,
-# they are one of the rare few builtin scalars which serve as valid return types.
-# See https://github.com/numpy/numpy-stubs/pull/67#discussion_r412604113.
-_ScalarNumpy = Union[generic, dt.datetime, dt.timedelta]
-_ScalarBuiltin = Union[str, bytes, dt.date, dt.timedelta, bool, int, float, complex]
-_Scalar = Union[_ScalarBuiltin, _ScalarNumpy]
-
-# Integers and booleans can generally be used interchangeably
-_ScalarIntOrBool = TypeVar("_ScalarIntOrBool", bound=Union[integer, bool_])
-_ScalarGeneric = TypeVar("_ScalarGeneric", bound=generic)
-_ScalarGenericDT = TypeVar(
-    "_ScalarGenericDT", bound=Union[dt.datetime, dt.timedelta, generic]
-)
-
-_Number = TypeVar('_Number', bound=number)
 _NumberLike = Union[int, float, complex, number, bool_]
-
-# An array-like object consisting of integers
 _IntLike = Union[int, integer]
 _BoolLike = Union[bool, bool_]
-_IntOrBool = Union[_IntLike, _BoolLike]
-_ArrayLikeIntNested = ArrayLike  # TODO: wait for support for recursive types
-_ArrayLikeBoolNested = ArrayLike  # TODO: wait for support for recursive types
-
-# Integers and booleans can generally be used interchangeably
-_ArrayLikeIntOrBool = Union[
-    _IntOrBool,
-    ndarray,
-    Sequence[_IntOrBool],
-    Sequence[_ArrayLikeIntNested],
-    Sequence[_ArrayLikeBoolNested],
-]
-_ArrayLikeBool = Union[
-    _BoolLike,
-    Sequence[_BoolLike],
-    ndarray
-]
-
-# The signature of take() follows a common theme with its overloads:
-# 1. A generic comes in; the same generic comes out
-# 2. A scalar comes in; a generic comes out
-# 3. An array-like object comes in; some keyword ensures that a generic comes out
-# 4. An array-like object comes in; an ndarray or generic comes out
-@overload
-def take(
-    a: _ScalarGenericDT,
-    indices: int,
-    axis: Optional[int] = ...,
-    out: Optional[ndarray] = ...,
-    mode: _Mode = ...,
-) -> _ScalarGenericDT: ...
-@overload
-def take(
-    a: _Scalar,
-    indices: int,
-    axis: Optional[int] = ...,
-    out: Optional[ndarray] = ...,
-    mode: _Mode = ...,
-) -> _ScalarNumpy: ...
-@overload
-def take(
-    a: ArrayLike,
-    indices: int,
-    axis: Optional[int] = ...,
-    out: Optional[ndarray] = ...,
-    mode: _Mode = ...,
-) -> _ScalarNumpy: ...
-@overload
-def take(
-    a: ArrayLike,
-    indices: _ArrayLikeIntOrBool,
-    axis: Optional[int] = ...,
-    out: Optional[ndarray] = ...,
-    mode: _Mode = ...,
-) -> Union[_ScalarNumpy, ndarray]: ...
-def reshape(a: ArrayLike, newshape: _ShapeLike, order: _OrderACF = ...) -> ndarray: ...
-@overload
-def choose(
-    a: _ScalarIntOrBool,
-    choices: ArrayLike,
-    out: Optional[ndarray] = ...,
-    mode: _Mode = ...,
-) -> _ScalarIntOrBool: ...
-@overload
-def choose(
-    a: _IntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ...
-) -> Union[integer, bool_]: ...
-@overload
-def choose(
-    a: _ArrayLikeIntOrBool,
-    choices: ArrayLike,
-    out: Optional[ndarray] = ...,
-    mode: _Mode = ...,
-) -> ndarray: ...
-def repeat(
-    a: ArrayLike, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ...
-) -> ndarray: ...
-def put(
-    a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ...
-) -> None: ...
-def swapaxes(a: ArrayLike, axis1: int, axis2: int) -> ndarray: ...
-def transpose(
-    a: ArrayLike, axes: Union[None, Sequence[int], ndarray] = ...
-) -> ndarray: ...
-def partition(
-    a: ArrayLike,
-    kth: _ArrayLikeIntOrBool,
-    axis: Optional[int] = ...,
-    kind: _PartitionKind = ...,
-    order: Union[None, str, Sequence[str]] = ...,
-) -> ndarray: ...
-@overload
-def argpartition(
-    a: generic,
-    kth: _ArrayLikeIntOrBool,
-    axis: Optional[int] = ...,
-    kind: _PartitionKind = ...,
-    order: Union[None, str, Sequence[str]] = ...,
-) -> integer: ...
-@overload
-def argpartition(
-    a: _ScalarBuiltin,
-    kth: _ArrayLikeIntOrBool,
-    axis: Optional[int] = ...,
-    kind: _PartitionKind = ...,
-    order: Union[None, str, Sequence[str]] = ...,
-) -> ndarray: ...
-@overload
-def argpartition(
-    a: ArrayLike,
-    kth: _ArrayLikeIntOrBool,
-    axis: Optional[int] = ...,
-    kind: _PartitionKind = ...,
-    order: Union[None, str, Sequence[str]] = ...,
-) -> ndarray: ...
-def sort(
-    a: ArrayLike,
-    axis: Optional[int] = ...,
-    kind: Optional[_SortKind] = ...,
-    order: Union[None, str, Sequence[str]] = ...,
-) -> ndarray: ...
-def argsort(
-    a: ArrayLike,
-    axis: Optional[int] = ...,
-    kind: Optional[_SortKind] = ...,
-    order: Union[None, str, Sequence[str]] = ...,
-) -> ndarray: ...
-@overload
-def argmax(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ...
-@overload
-def argmax(
-    a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ...
-) -> Union[integer, ndarray]: ...
-@overload
-def argmin(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ...
-@overload
-def argmin(
-    a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ...
-) -> Union[integer, ndarray]: ...
-@overload
-def searchsorted(
-    a: ArrayLike,
-    v: _Scalar,
-    side: _Side = ...,
-    sorter: Optional[_ArrayLikeIntOrBool] = ...,  # 1D int array
-) -> integer: ...
-@overload
-def searchsorted(
-    a: ArrayLike,
-    v: ArrayLike,
-    side: _Side = ...,
-    sorter: Optional[_ArrayLikeIntOrBool] = ...,  # 1D int array
-) -> ndarray: ...
-def resize(a: ArrayLike, new_shape: _ShapeLike) -> ndarray: ...
-@overload
-def squeeze(a: _ScalarGeneric, axis: Optional[_ShapeLike] = ...) -> _ScalarGeneric: ...
-@overload
-def squeeze(a: ArrayLike, axis: Optional[_ShapeLike] = ...) -> ndarray: ...
-def diagonal(
-    a: ArrayLike, offset: int = ..., axis1: int = ..., axis2: int = ...  # >= 2D array
-) -> ndarray: ...
-def trace(
-    a: ArrayLike,  # >= 2D array
-    offset: int = ...,
-    axis1: int = ...,
-    axis2: int = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-) -> Union[number, ndarray]: ...
-def ravel(a: ArrayLike, order: _OrderKACF = ...) -> ndarray: ...
-def nonzero(a: ArrayLike) -> Tuple[ndarray, ...]: ...
-def shape(a: ArrayLike) -> _Shape: ...
-def compress(
-    condition: ArrayLike,  # 1D bool array
-    a: ArrayLike,
-    axis: Optional[int] = ...,
-    out: Optional[ndarray] = ...,
-) -> ndarray: ...
-@overload
-def clip(
-    a: _Number,
-    a_min: ArrayLike,
-    a_max: Optional[ArrayLike],
-    out: Optional[ndarray] = ...,
-    **kwargs: Any,
-) -> _Number: ...
-@overload
-def clip(
-    a: _Number,
-    a_min: None,
-    a_max: ArrayLike,
-    out: Optional[ndarray] = ...,
-    **kwargs: Any,
-) -> _Number: ...
-@overload
-def clip(
-    a: ArrayLike,
-    a_min: ArrayLike,
-    a_max: Optional[ArrayLike],
-    out: Optional[ndarray] = ...,
-    **kwargs: Any,
-) -> Union[number, ndarray]: ...
-@overload
-def clip(
-    a: ArrayLike,
-    a_min: None,
-    a_max: ArrayLike,
-    out: Optional[ndarray] = ...,
-    **kwargs: Any,
-) -> Union[number, ndarray]: ...
-@overload
-def sum(
-    a: _Number,
-    axis: Optional[_ShapeLike] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> _Number: ...
-@overload
-def sum(
-    a: ArrayLike,
-    axis: _ShapeLike = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> Union[number, ndarray]: ...
-@overload
-def all(
-    a: ArrayLike,
-    axis: None = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: Literal[False] = ...,
-) -> bool_: ...
-@overload
-def all(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-) -> Union[bool_, ndarray]: ...
-@overload
-def any(
-    a: ArrayLike,
-    axis: None = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: Literal[False] = ...,
-) -> bool_: ...
-@overload
-def any(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-) -> Union[bool_, ndarray]: ...
-def cumsum(
-    a: ArrayLike,
-    axis: Optional[int] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-) -> ndarray: ...
-@overload
-def ptp(
-    a: _Number,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-) -> _Number: ...
-@overload
-def ptp(
-    a: ArrayLike,
-    axis: None = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: Literal[False] = ...,
-) -> number: ...
-@overload
-def ptp(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-) -> Union[number, ndarray]: ...
-@overload
-def amax(
-    a: _Number,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> _Number: ...
-@overload
-def amax(
-    a: ArrayLike,
-    axis: None = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: Literal[False] = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> number: ...
-@overload
-def amax(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> Union[number, ndarray]: ...
-@overload
-def amin(
-    a: _Number,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> _Number: ...
-@overload
-def amin(
-    a: ArrayLike,
-    axis: None = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: Literal[False] = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> number: ...
-@overload
-def amin(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> Union[number, ndarray]: ...
-
-# TODO: `np.prod()``: For object arrays `initial` does not necessarily
-# have to be a numerical scalar.
-# The only requirement is that it is compatible
-# with the `.__mul__()` method(s) of the passed array's elements.
-
-# Note that the same situation holds for all wrappers around
-# `np.ufunc.reduce`, e.g. `np.sum()` (`.__add__()`).
-
-@overload
-def prod(
-    a: _Number,
-    axis: Optional[_ShapeLike] = ...,
-    dtype: DtypeLike = ...,
-    out: None = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> _Number: ...
-@overload
-def prod(
-    a: ArrayLike,
-    axis: None = ...,
-    dtype: DtypeLike = ...,
-    out: None = ...,
-    keepdims: Literal[False] = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> number: ...
-@overload
-def prod(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-    initial: _NumberLike = ...,
-    where: _ArrayLikeBool = ...,
-) -> Union[number, ndarray]: ...
-def cumprod(
-    a: ArrayLike,
-    axis: Optional[int] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-) -> ndarray: ...
-def ndim(a: ArrayLike) -> int: ...
-def size(a: ArrayLike, axis: Optional[int] = ...) -> int: ...
-@overload
-def around(
-    a: _Number, decimals: int = ..., out: Optional[ndarray] = ...
-) -> _Number: ...
-@overload
-def around(
-    a: _NumberLike, decimals: int = ..., out: Optional[ndarray] = ...
-) -> number: ...
-@overload
-def around(
-    a: ArrayLike, decimals: int = ..., out: Optional[ndarray] = ...
-) -> ndarray: ...
-@overload
-def mean(
-    a: ArrayLike,
-    axis: None = ...,
-    dtype: DtypeLike = ...,
-    out: None = ...,
-    keepdims: Literal[False] = ...,
-) -> number: ...
-@overload
-def mean(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-    keepdims: bool = ...,
-) -> Union[number, ndarray]: ...
-@overload
-def std(
-    a: ArrayLike,
-    axis: None = ...,
-    dtype: DtypeLike = ...,
-    out: None = ...,
-    ddof: int = ...,
-    keepdims: Literal[False] = ...,
-) -> number: ...
-@overload
-def std(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-    ddof: int = ...,
-    keepdims: bool = ...,
-) -> Union[number, ndarray]: ...
-@overload
-def var(
-    a: ArrayLike,
-    axis: None = ...,
-    dtype: DtypeLike = ...,
-    out: None = ...,
-    ddof: int = ...,
-    keepdims: Literal[False] = ...,
-) -> number: ...
-@overload
-def var(
-    a: ArrayLike,
-    axis: Optional[_ShapeLike] = ...,
-    dtype: DtypeLike = ...,
-    out: Optional[ndarray] = ...,
-    ddof: int = ...,
-    keepdims: bool = ...,
-) -> Union[number, ndarray]: ...
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 8ef9392d3..dc5c2577a 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -8,12 +8,12 @@ sys.path.insert(0, os.path.dirname(__file__))
 import ufunc_docstrings as docstrings
 sys.path.pop(0)
 
-Zero = "PyInt_FromLong(0)"
-One = "PyInt_FromLong(1)"
+Zero = "PyLong_FromLong(0)"
+One = "PyLong_FromLong(1)"
 True_ = "(Py_INCREF(Py_True), Py_True)"
 False_ = "(Py_INCREF(Py_False), Py_False)"
 None_ = object()
-AllOnes = "PyInt_FromLong(-1)"
+AllOnes = "PyLong_FromLong(-1)"
 MinusInfinity = 'PyFloat_FromDouble(-NPY_INFINITY)'
 ReorderableNone = "(Py_INCREF(Py_None), Py_None)"
 
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index b07def736..b1524b891 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -460,7 +460,7 @@ def repeat(a, repeats, axis=None):
     --------
     tile : Tile an array.
     unique : Find the unique elements of an array.
-    
+
     Examples
     --------
     >>> np.repeat(3, 4)
@@ -2007,8 +2007,8 @@ def compress(condition, a, axis=None, out=None):
     --------
     take, choose, diag, diagonal, select
     ndarray.compress : Equivalent method in ndarray
-    np.extract: Equivalent method when working on 1-D arrays
-    ufuncs-output-type
+    extract: Equivalent method when working on 1-D arrays
+    :ref:`ufuncs-output-type`
 
     Examples
     --------
@@ -2082,7 +2082,7 @@ def clip(a, a_min, a_max, out=None, **kwargs):
 
     See Also
     --------
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Examples
     --------
@@ -2278,7 +2278,7 @@ def any(a, axis=None, out=None, keepdims=np._NoValue):
         the same shape as the expected output and its type is preserved
         (e.g., if it is of type float, then it will remain so, returning
         1.0 for True and 0.0 for False, regardless of the type of `a`).
-        See `ufuncs-output-type` for more details.
+        See :ref:`ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2363,7 +2363,7 @@ def all(a, axis=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.
         It must have the same shape as the expected output and its
         type is preserved (e.g., if ``dtype(out)`` is float, the result
-        will consist of 0.0's and 1.0's). See `ufuncs-output-type` for more
+        will consist of 0.0's and 1.0's). See :ref:`ufuncs-output-type` for more
         details.
 
     keepdims : bool, optional
@@ -2442,7 +2442,7 @@ def cumsum(a, axis=None, dtype=None, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output
-        but the type will be cast if necessary. See `ufuncs-output-type` for
+        but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
         more details.
 
     Returns
@@ -2613,7 +2613,7 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
     out : ndarray, optional
         Alternative output array in which to place the result.  Must
         be of the same shape and buffer length as the expected output.
-        See `ufuncs-output-type` for more details.
+        See :ref:`ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2738,7 +2738,7 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
     out : ndarray, optional
         Alternative output array in which to place the result.  Must
         be of the same shape and buffer length as the expected output.
-        See `ufuncs-output-type` for more details.
+        See :ref:`ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -2948,7 +2948,7 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     See Also
     --------
     ndarray.prod : equivalent method
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Notes
     -----
@@ -3044,7 +3044,7 @@ def cumprod(a, axis=None, dtype=None, out=None):
 
     See Also
     --------
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Notes
     -----
@@ -3190,7 +3190,7 @@ def around(a, decimals=0, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output, but the type of the output
-        values will be cast if necessary. See `ufuncs-output-type` for more
+        values will be cast if necessary. See :ref:`ufuncs-output-type` for more
         details.
 
     Returns
@@ -3305,7 +3305,7 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
         expected output, but the type will be cast if necessary.
-        See `ufuncs-output-type` for more details.
+        See :ref:`ufuncs-output-type` for more details.
 
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
@@ -3440,12 +3440,12 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     See Also
     --------
     var, mean, nanmean, nanstd, nanvar
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Notes
     -----
     The standard deviation is the square root of the average of the squared
-    deviations from the mean, i.e., ``std = sqrt(mean(x))``, where 
+    deviations from the mean, i.e., ``std = sqrt(mean(x))``, where
     ``x = abs(a - a.mean())**2``.
 
     The average squared deviation is typically calculated as ``x.sum() / N``,
@@ -3566,7 +3566,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     See Also
     --------
     std, mean, nanmean, nanstd, nanvar
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Notes
     -----
diff --git a/numpy/core/fromnumeric.pyi b/numpy/core/fromnumeric.pyi
new file mode 100644
index 000000000..7ad772b07
--- /dev/null
+++ b/numpy/core/fromnumeric.pyi
@@ -0,0 +1,492 @@
+import sys
+import datetime as dt
+from typing import Optional, Union, Sequence, Tuple, Any, overload, TypeVar
+
+from numpy import (
+    ndarray,
+    number,
+    integer,
+    bool_,
+    generic,
+    _OrderKACF,
+    _OrderACF,
+    _IntLike,
+    _BoolLike,
+    _NumberLike,
+)
+from numpy.typing import DtypeLike, ArrayLike, _ShapeLike, _Shape
+
+if sys.version_info >= (3, 8):
+    from typing import Literal
+else:
+    from typing_extensions import Literal
+
+_Mode = Literal["raise", "wrap", "clip"]
+_PartitionKind = Literal["introselect"]
+_SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
+_Side = Literal["left", "right"]
+
+# Various annotations for scalars
+
+# While dt.datetime and dt.timedelta are not technically part of NumPy,
+# they are one of the rare few builtin scalars which serve as valid return types.
+# See https://github.com/numpy/numpy-stubs/pull/67#discussion_r412604113.
+_ScalarNumpy = Union[generic, dt.datetime, dt.timedelta]
+_ScalarBuiltin = Union[str, bytes, dt.date, dt.timedelta, bool, int, float, complex]
+_Scalar = Union[_ScalarBuiltin, _ScalarNumpy]
+
+# Integers and booleans can generally be used interchangeably
+_ScalarIntOrBool = TypeVar("_ScalarIntOrBool", bound=Union[integer, bool_])
+_ScalarGeneric = TypeVar("_ScalarGeneric", bound=generic)
+_ScalarGenericDT = TypeVar(
+    "_ScalarGenericDT", bound=Union[dt.datetime, dt.timedelta, generic]
+)
+
+_Number = TypeVar("_Number", bound=number)
+
+# An array-like object consisting of integers
+_IntOrBool = Union[_IntLike, _BoolLike]
+_ArrayLikeIntNested = ArrayLike  # TODO: wait for support for recursive types
+_ArrayLikeBoolNested = ArrayLike  # TODO: wait for support for recursive types
+
+# Integers and booleans can generally be used interchangeably
+_ArrayLikeIntOrBool = Union[
+    _IntOrBool,
+    ndarray,
+    Sequence[_IntOrBool],
+    Sequence[_ArrayLikeIntNested],
+    Sequence[_ArrayLikeBoolNested],
+]
+_ArrayLikeBool = Union[_BoolLike, Sequence[_BoolLike], ndarray]
+
+# The signature of take() follows a common theme with its overloads:
+# 1. A generic comes in; the same generic comes out
+# 2. A scalar comes in; a generic comes out
+# 3. An array-like object comes in; some keyword ensures that a generic comes out
+# 4. An array-like object comes in; an ndarray or generic comes out
+@overload
+def take(
+    a: _ScalarGenericDT,
+    indices: int,
+    axis: Optional[int] = ...,
+    out: Optional[ndarray] = ...,
+    mode: _Mode = ...,
+) -> _ScalarGenericDT: ...
+@overload
+def take(
+    a: _Scalar,
+    indices: int,
+    axis: Optional[int] = ...,
+    out: Optional[ndarray] = ...,
+    mode: _Mode = ...,
+) -> _ScalarNumpy: ...
+@overload
+def take(
+    a: ArrayLike,
+    indices: int,
+    axis: Optional[int] = ...,
+    out: Optional[ndarray] = ...,
+    mode: _Mode = ...,
+) -> _ScalarNumpy: ...
+@overload
+def take(
+    a: ArrayLike,
+    indices: _ArrayLikeIntOrBool,
+    axis: Optional[int] = ...,
+    out: Optional[ndarray] = ...,
+    mode: _Mode = ...,
+) -> Union[_ScalarNumpy, ndarray]: ...
+def reshape(a: ArrayLike, newshape: _ShapeLike, order: _OrderACF = ...) -> ndarray: ...
+@overload
+def choose(
+    a: _ScalarIntOrBool,
+    choices: ArrayLike,
+    out: Optional[ndarray] = ...,
+    mode: _Mode = ...,
+) -> _ScalarIntOrBool: ...
+@overload
+def choose(
+    a: _IntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ...
+) -> Union[integer, bool_]: ...
+@overload
+def choose(
+    a: _ArrayLikeIntOrBool,
+    choices: ArrayLike,
+    out: Optional[ndarray] = ...,
+    mode: _Mode = ...,
+) -> ndarray: ...
+def repeat(
+    a: ArrayLike, repeats: _ArrayLikeIntOrBool, axis: Optional[int] = ...
+) -> ndarray: ...
+def put(
+    a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ...
+) -> None: ...
+def swapaxes(a: ArrayLike, axis1: int, axis2: int) -> ndarray: ...
+def transpose(
+    a: ArrayLike, axes: Union[None, Sequence[int], ndarray] = ...
+) -> ndarray: ...
+def partition(
+    a: ArrayLike,
+    kth: _ArrayLikeIntOrBool,
+    axis: Optional[int] = ...,
+    kind: _PartitionKind = ...,
+    order: Union[None, str, Sequence[str]] = ...,
+) -> ndarray: ...
+@overload
+def argpartition(
+    a: generic,
+    kth: _ArrayLikeIntOrBool,
+    axis: Optional[int] = ...,
+    kind: _PartitionKind = ...,
+    order: Union[None, str, Sequence[str]] = ...,
+) -> integer: ...
+@overload
+def argpartition(
+    a: _ScalarBuiltin,
+    kth: _ArrayLikeIntOrBool,
+    axis: Optional[int] = ...,
+    kind: _PartitionKind = ...,
+    order: Union[None, str, Sequence[str]] = ...,
+) -> ndarray: ...
+@overload
+def argpartition(
+    a: ArrayLike,
+    kth: _ArrayLikeIntOrBool,
+    axis: Optional[int] = ...,
+    kind: _PartitionKind = ...,
+    order: Union[None, str, Sequence[str]] = ...,
+) -> ndarray: ...
+def sort(
+    a: ArrayLike,
+    axis: Optional[int] = ...,
+    kind: Optional[_SortKind] = ...,
+    order: Union[None, str, Sequence[str]] = ...,
+) -> ndarray: ...
+def argsort(
+    a: ArrayLike,
+    axis: Optional[int] = ...,
+    kind: Optional[_SortKind] = ...,
+    order: Union[None, str, Sequence[str]] = ...,
+) -> ndarray: ...
+@overload
+def argmax(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ...
+@overload
+def argmax(
+    a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ...
+) -> Union[integer, ndarray]: ...
+@overload
+def argmin(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ...
+@overload
+def argmin(
+    a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ...
+) -> Union[integer, ndarray]: ...
+@overload
+def searchsorted(
+    a: ArrayLike,
+    v: _Scalar,
+    side: _Side = ...,
+    sorter: Optional[_ArrayLikeIntOrBool] = ...,  # 1D int array
+) -> integer: ...
+@overload
+def searchsorted(
+    a: ArrayLike,
+    v: ArrayLike,
+    side: _Side = ...,
+    sorter: Optional[_ArrayLikeIntOrBool] = ...,  # 1D int array
+) -> ndarray: ...
+def resize(a: ArrayLike, new_shape: _ShapeLike) -> ndarray: ...
+@overload
+def squeeze(a: _ScalarGeneric, axis: Optional[_ShapeLike] = ...) -> _ScalarGeneric: ...
+@overload
+def squeeze(a: ArrayLike, axis: Optional[_ShapeLike] = ...) -> ndarray: ...
+def diagonal(
+    a: ArrayLike, offset: int = ..., axis1: int = ..., axis2: int = ...  # >= 2D array
+) -> ndarray: ...
+def trace(
+    a: ArrayLike,  # >= 2D array
+    offset: int = ...,
+    axis1: int = ...,
+    axis2: int = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+) -> Union[number, ndarray]: ...
+def ravel(a: ArrayLike, order: _OrderKACF = ...) -> ndarray: ...
+def nonzero(a: ArrayLike) -> Tuple[ndarray, ...]: ...
+def shape(a: ArrayLike) -> _Shape: ...
+def compress(
+    condition: ArrayLike,  # 1D bool array
+    a: ArrayLike,
+    axis: Optional[int] = ...,
+    out: Optional[ndarray] = ...,
+) -> ndarray: ...
+@overload
+def clip(
+    a: _Number,
+    a_min: ArrayLike,
+    a_max: Optional[ArrayLike],
+    out: Optional[ndarray] = ...,
+    **kwargs: Any,
+) -> _Number: ...
+@overload
+def clip(
+    a: _Number,
+    a_min: None,
+    a_max: ArrayLike,
+    out: Optional[ndarray] = ...,
+    **kwargs: Any,
+) -> _Number: ...
+@overload
+def clip(
+    a: ArrayLike,
+    a_min: ArrayLike,
+    a_max: Optional[ArrayLike],
+    out: Optional[ndarray] = ...,
+    **kwargs: Any,
+) -> Union[number, ndarray]: ...
+@overload
+def clip(
+    a: ArrayLike,
+    a_min: None,
+    a_max: ArrayLike,
+    out: Optional[ndarray] = ...,
+    **kwargs: Any,
+) -> Union[number, ndarray]: ...
+@overload
+def sum(
+    a: _Number,
+    axis: Optional[_ShapeLike] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> _Number: ...
+@overload
+def sum(
+    a: ArrayLike,
+    axis: _ShapeLike = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def all(
+    a: ArrayLike,
+    axis: None = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: Literal[False] = ...,
+) -> bool_: ...
+@overload
+def all(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+) -> Union[bool_, ndarray]: ...
+@overload
+def any(
+    a: ArrayLike,
+    axis: None = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: Literal[False] = ...,
+) -> bool_: ...
+@overload
+def any(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+) -> Union[bool_, ndarray]: ...
+def cumsum(
+    a: ArrayLike,
+    axis: Optional[int] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+) -> ndarray: ...
+@overload
+def ptp(
+    a: _Number,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+) -> _Number: ...
+@overload
+def ptp(
+    a: ArrayLike,
+    axis: None = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def ptp(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def amax(
+    a: _Number,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> _Number: ...
+@overload
+def amax(
+    a: ArrayLike,
+    axis: None = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: Literal[False] = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> number: ...
+@overload
+def amax(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def amin(
+    a: _Number,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> _Number: ...
+@overload
+def amin(
+    a: ArrayLike,
+    axis: None = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: Literal[False] = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> number: ...
+@overload
+def amin(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> Union[number, ndarray]: ...
+
+# TODO: `np.prod()``: For object arrays `initial` does not necessarily
+# have to be a numerical scalar.
+# The only requirement is that it is compatible
+# with the `.__mul__()` method(s) of the passed array's elements.
+
+# Note that the same situation holds for all wrappers around
+# `np.ufunc.reduce`, e.g. `np.sum()` (`.__add__()`).
+@overload
+def prod(
+    a: _Number,
+    axis: Optional[_ShapeLike] = ...,
+    dtype: DtypeLike = ...,
+    out: None = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> _Number: ...
+@overload
+def prod(
+    a: ArrayLike,
+    axis: None = ...,
+    dtype: DtypeLike = ...,
+    out: None = ...,
+    keepdims: Literal[False] = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> number: ...
+@overload
+def prod(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+    initial: _NumberLike = ...,
+    where: _ArrayLikeBool = ...,
+) -> Union[number, ndarray]: ...
+def cumprod(
+    a: ArrayLike,
+    axis: Optional[int] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+) -> ndarray: ...
+def ndim(a: ArrayLike) -> int: ...
+def size(a: ArrayLike, axis: Optional[int] = ...) -> int: ...
+@overload
+def around(
+    a: _Number, decimals: int = ..., out: Optional[ndarray] = ...
+) -> _Number: ...
+@overload
+def around(
+    a: _NumberLike, decimals: int = ..., out: Optional[ndarray] = ...
+) -> number: ...
+@overload
+def around(
+    a: ArrayLike, decimals: int = ..., out: Optional[ndarray] = ...
+) -> ndarray: ...
+@overload
+def mean(
+    a: ArrayLike,
+    axis: None = ...,
+    dtype: DtypeLike = ...,
+    out: None = ...,
+    keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def mean(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+    keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def std(
+    a: ArrayLike,
+    axis: None = ...,
+    dtype: DtypeLike = ...,
+    out: None = ...,
+    ddof: int = ...,
+    keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def std(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+    ddof: int = ...,
+    keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
+@overload
+def var(
+    a: ArrayLike,
+    axis: None = ...,
+    dtype: DtypeLike = ...,
+    out: None = ...,
+    ddof: int = ...,
+    keepdims: Literal[False] = ...,
+) -> number: ...
+@overload
+def var(
+    a: ArrayLike,
+    axis: Optional[_ShapeLike] = ...,
+    dtype: DtypeLike = ...,
+    out: Optional[ndarray] = ...,
+    ddof: int = ...,
+    keepdims: bool = ...,
+) -> Union[number, ndarray]: ...
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 6eca4afdb..6bf54938f 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1839,6 +1839,10 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
             PyArray_DTypeMeta *cls, PyTypeObject *obj);
 
     typedef PyArray_Descr *(default_descr_function)(PyArray_DTypeMeta *cls);
+    typedef PyArray_DTypeMeta *(common_dtype_function)(
+            PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtyep2);
+    typedef PyArray_Descr *(common_instance_function)(
+            PyArray_Descr *dtype1, PyArray_Descr *dtyep2);
 
     /*
      * While NumPy DTypes would not need to be heap types the plan is to
@@ -1894,6 +1898,8 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
         discover_descr_from_pyobject_function *discover_descr_from_pyobject;
         is_known_scalar_type_function *is_known_scalar_type;
         default_descr_function *default_descr;
+        common_dtype_function *common_dtype;
+        common_instance_function *common_instance;
     };
 
 #endif  /* NPY_INTERNAL_BUILD */
diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h
index 4bc06fc96..191cd244f 100644
--- a/numpy/core/include/numpy/npy_3kcompat.h
+++ b/numpy/core/include/numpy/npy_3kcompat.h
@@ -28,6 +28,30 @@ extern "C" {
  * PyInt -> PyLong
  */
 
+
+/*
+ * This is a renamed copy of the Python non-limited API function _PyLong_AsInt. It is
+ * included here because it is missing from the PyPy API. It completes the PyLong_As*
+ * group of functions and can be useful in replacing PyInt_Check.
+ */
+static NPY_INLINE int
+Npy__PyLong_AsInt(PyObject *obj)
+{
+    int overflow;
+    long result = PyLong_AsLongAndOverflow(obj, &overflow);
+
+    /* INT_MAX and INT_MIN are defined in Python.h */
+    if (overflow || result > INT_MAX || result < INT_MIN) {
+        /* XXX: could be cute and give a different
+           message for overflow == -1 */
+        PyErr_SetString(PyExc_OverflowError,
+                        "Python int too large to convert to C int");
+        return -1;
+    }
+    return (int)result;
+}
+
+
 #if defined(NPY_PY3K)
 /* Return True only if the long fits in a C long */
 static NPY_INLINE int PyInt_Check(PyObject *op) {
@@ -39,6 +63,7 @@ static NPY_INLINE int PyInt_Check(PyObject *op) {
     return (overflow == 0);
 }
 
+
 #define PyInt_FromLong PyLong_FromLong
 #define PyInt_AsLong PyLong_AsLong
 #define PyInt_AS_LONG PyLong_AsLong
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 5706e0576..560b82912 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -262,11 +262,10 @@ typedef Py_uintptr_t npy_uintp;
 #define constchar char
 
 /* NPY_INTP_FMT Note:
- *      Unlike the other NPY_*_FMT macros which are used with
- *      PyOS_snprintf, NPY_INTP_FMT is used with PyErr_Format and
- *      PyString_Format. These functions use different formatting
- *      codes which are portably specified according to the Python
- *      documentation. See ticket #1795.
+ *      Unlike the other NPY_*_FMT macros, which are used with PyOS_snprintf,
+ *      NPY_INTP_FMT is used with PyErr_Format and PyUnicode_FromFormat. Those
+ *      functions use different formatting codes that are portably specified
+ *      according to the Python documentation. See issue gh-2388.
  */
 #if NPY_SIZEOF_PY_INTPTR_T == NPY_SIZEOF_INT
         #define NPY_INTP NPY_INT
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 225c9554c..540d1ea9b 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -163,12 +163,12 @@ def concatenate(arrays, axis=None, out=None, *, dtype=None, casting=None):
         If provided, the destination array will have this dtype. Cannot be
         provided together with `out`.
 
-        ..versionadded:: 1.20.0
+        .. versionadded:: 1.20.0
 
     casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
         Controls what kind of data casting may occur. Defaults to 'same_kind'.
 
-        ..versionadded:: 1.20.0
+        .. versionadded:: 1.20.0
 
     Returns
     -------
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 2a015f48f..e705dd3ea 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -358,13 +358,15 @@ def issubsctype(arg1, arg2):
 
 @set_module('numpy')
 def issubdtype(arg1, arg2):
-    """
+    r"""
     Returns True if first argument is a typecode lower/equal in type hierarchy.
 
+    This is like the builtin :func:`issubclass`, but for `dtype`\ s.
+
     Parameters
     ----------
     arg1, arg2 : dtype_like
-        dtype or string representing a typecode.
+        `dtype` or object coercible to one
 
     Returns
     -------
@@ -372,15 +374,45 @@ def issubdtype(arg1, arg2):
 
     See Also
     --------
+    :ref:`arrays.scalars` : Overview of the numpy type hierarchy.
     issubsctype, issubclass_
-    numpy.core.numerictypes : Overview of numpy type hierarchy.
 
     Examples
     --------
-    >>> np.issubdtype('S1', np.string_)
+    `issubdtype` can be used to check the type of arrays:
+
+    >>> ints = np.array([1, 2, 3], dtype=np.int32)
+    >>> np.issubdtype(ints.dtype, np.integer)
+    True
+    >>> np.issubdtype(ints.dtype, np.floating)
+    False
+
+    >>> floats = np.array([1, 2, 3], dtype=np.float32)
+    >>> np.issubdtype(floats.dtype, np.integer)
+    False
+    >>> np.issubdtype(floats.dtype, np.floating)
     True
+
+    Similar types of different sizes are not subdtypes of each other:
+
     >>> np.issubdtype(np.float64, np.float32)
     False
+    >>> np.issubdtype(np.float32, np.float64)
+    False
+
+    but both are subtypes of `floating`:
+
+    >>> np.issubdtype(np.float64, np.floating)
+    True
+    >>> np.issubdtype(np.float32, np.floating)
+    True
+
+    For convenience, dtype-like objects are allowed too:
+
+    >>> np.issubdtype('S1', np.string_)
+    True
+    >>> np.issubdtype('i4', np.signedinteger)
+    True
 
     """
     if not issubclass_(arg1, generic):
diff --git a/numpy/core/records.py b/numpy/core/records.py
index e95be0e3f..c2f6c6965 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -374,7 +374,7 @@ class recarray(ndarray):
 
     See Also
     --------
-    rec.fromrecords : Construct a record array from data.
+    core.records.fromrecords : Construct a record array from data.
     record : fundamental data-type for `recarray`.
     format_parser : determine a data-type from formats, names, titles.
 
@@ -630,7 +630,7 @@ def fromarrays(arrayList, dtype=None, shape=None, formats=None,
     >>> x1[1]=34
     >>> r.a
     array([1, 2, 3, 4])
-    
+
     >>> x1 = np.array([1, 2, 3, 4])
     >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
     >>> x3 = np.array([1.1, 2, 3,4])
@@ -911,7 +911,7 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
         shape = (shape,)
 
     if hasattr(fd, 'readinto'):
-        # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface. 
+        # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface.
         # Example of fd: gzip, BytesIO, BufferedReader
         # file already opened
         ctx = contextlib_nullcontext(fd)
@@ -958,7 +958,7 @@ def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
     """
     Construct a record array from a wide-variety of objects.
 
-    A general-purpose record array constructor that dispatches to the 
+    A general-purpose record array constructor that dispatches to the
     appropriate `recarray` creation function based on the inputs (see Notes).
 
     Parameters
@@ -996,7 +996,7 @@ def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
     `obj` is a string, then call the `fromstring` constructor. If `obj` is a
     list or a tuple, then if the first object is an `~numpy.ndarray`, call
     `fromarrays`, otherwise call `fromrecords`. If `obj` is a
-    `~numpy.recarray`, then make a copy of the data in the recarray 
+    `~numpy.recarray`, then make a copy of the data in the recarray
     (if ``copy=True``) and use the new formats, names, and titles. If `obj`
     is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
     return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index fc4b2647a..ea04c82bd 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -186,7 +186,7 @@ test_neighborhood_iterator(PyObject* NPY_UNUSED(self), PyObject* args)
             Py_DECREF(bound);
             goto clean_itx;
         }
-        bounds[i] = PyInt_AsLong(bound);
+        bounds[i] = PyLong_AsSsize_t(bound);
         Py_DECREF(bound);
     }
 
@@ -345,7 +345,7 @@ test_neighborhood_iterator_oob(PyObject* NPY_UNUSED(self), PyObject* args)
             Py_DECREF(bound);
             goto clean_itx;
         }
-        bounds[i] = PyInt_AsLong(bound);
+        bounds[i] = PyLong_AsSsize_t(bound);
         Py_DECREF(bound);
     }
 
@@ -369,7 +369,7 @@ test_neighborhood_iterator_oob(PyObject* NPY_UNUSED(self), PyObject* args)
             Py_DECREF(bound);
             goto clean_itx;
         }
-        bounds[i] = PyInt_AsLong(bound);
+        bounds[i] = PyLong_AsSsize_t(bound);
         Py_DECREF(bound);
     }
 
@@ -1153,11 +1153,11 @@ array_solve_diophantine(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject
     }
 
     for (j = 0; j < nterms; ++j) {
-        terms[j].a = (npy_int64)PyInt_AsSsize_t(PyTuple_GET_ITEM(A, j));
+        terms[j].a = (npy_int64)PyLong_AsSsize_t(PyTuple_GET_ITEM(A, j));
         if (error_converting(terms[j].a)) {
             goto fail;
         }
-        terms[j].ub = (npy_int64)PyInt_AsSsize_t(PyTuple_GET_ITEM(U, j));
+        terms[j].ub = (npy_int64)PyLong_AsSsize_t(PyTuple_GET_ITEM(U, j));
         if (error_converting(terms[j].ub)) {
             goto fail;
         }
@@ -1733,8 +1733,8 @@ get_struct_alignments(PyObject *NPY_UNUSED(self), PyObject *args) {
 /**begin repeat
  * #N = 1,2,3#
  */
-    alignment = PyInt_FromLong(_ALIGN(struct TestStruct@N@));
-    size = PyInt_FromLong(sizeof(struct TestStruct@N@));
+    alignment = PyLong_FromLong(_ALIGN(struct TestStruct@N@));
+    size = PyLong_FromLong(sizeof(struct TestStruct@N@));
     val = PyTuple_Pack(2, alignment, size);
     Py_DECREF(alignment);
     Py_DECREF(size);
@@ -1950,7 +1950,7 @@ run_byteorder_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_SWAP: return PyUnicode_FromString("NPY_SWAP");
         case NPY_IGNORE: return PyUnicode_FromString("NPY_IGNORE");
     }
-    return PyInt_FromLong(byteorder);
+    return PyLong_FromLong(byteorder);
 }
 
 static PyObject *
@@ -1965,7 +1965,7 @@ run_sortkind_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_HEAPSORT: return PyUnicode_FromString("NPY_HEAPSORT");
         case NPY_STABLESORT: return PyUnicode_FromString("NPY_STABLESORT");
     }
-    return PyInt_FromLong(kind);
+    return PyLong_FromLong(kind);
 }
 
 static PyObject *
@@ -1978,7 +1978,7 @@ run_selectkind_converter(PyObject* NPY_UNUSED(self), PyObject *args)
     switch (kind) {
         case NPY_INTROSELECT: return PyUnicode_FromString("NPY_INTROSELECT");
     }
-    return PyInt_FromLong(kind);
+    return PyLong_FromLong(kind);
 }
 
 static PyObject *
@@ -1992,7 +1992,7 @@ run_searchside_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_SEARCHLEFT: return PyUnicode_FromString("NPY_SEARCHLEFT");
         case NPY_SEARCHRIGHT: return PyUnicode_FromString("NPY_SEARCHRIGHT");
     }
-    return PyInt_FromLong(side);
+    return PyLong_FromLong(side);
 }
 
 static PyObject *
@@ -2008,7 +2008,7 @@ run_order_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_FORTRANORDER: return PyUnicode_FromString("NPY_FORTRANORDER");
         case NPY_KEEPORDER: return PyUnicode_FromString("NPY_KEEPORDER");
     }
-    return PyInt_FromLong(order);
+    return PyLong_FromLong(order);
 }
 
 static PyObject *
@@ -2023,7 +2023,7 @@ run_clipmode_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_WRAP: return PyUnicode_FromString("NPY_WRAP");
         case NPY_RAISE: return PyUnicode_FromString("NPY_RAISE");
     }
-    return PyInt_FromLong(mode);
+    return PyLong_FromLong(mode);
 }
 
 static PyObject *
@@ -2040,7 +2040,7 @@ run_casting_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_SAME_KIND_CASTING: return PyUnicode_FromString("NPY_SAME_KIND_CASTING");
         case NPY_UNSAFE_CASTING: return PyUnicode_FromString("NPY_UNSAFE_CASTING");
     }
-    return PyInt_FromLong(casting);
+    return PyLong_FromLong(casting);
 }
 
 static PyObject *
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index 3f3fd1387..aae8d5141 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -306,51 +306,6 @@ discover_dtype_from_pyobject(
 }
 
 
-/*
- * This function should probably become public API eventually.  At this
- * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`.
- * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement
- * this logic.
- */
-static NPY_INLINE PyArray_Descr *
-cast_descriptor_to_fixed_dtype(
-        PyArray_Descr *descr, PyArray_DTypeMeta *fixed_DType)
-{
-    if (fixed_DType == NULL) {
-        /* Nothing to do, we only need to promote the new dtype */
-        Py_INCREF(descr);
-        return descr;
-    }
-
-    if (!fixed_DType->parametric) {
-        /*
-         * Don't actually do anything, the default is always the result
-         * of any cast.
-         */
-        return fixed_DType->default_descr(fixed_DType);
-    }
-    if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)fixed_DType)) {
-        Py_INCREF(descr);
-        return descr;
-    }
-    /*
-     * TODO: When this is implemented for all dtypes, the special cases
-     *       can be removed...
-     */
-    if (fixed_DType->legacy && fixed_DType->parametric &&
-            NPY_DTYPE(descr)->legacy) {
-        PyArray_Descr *flex_dtype = PyArray_DescrFromType(fixed_DType->type_num);
-        return PyArray_AdaptFlexibleDType(descr, flex_dtype);
-    }
-
-    PyErr_SetString(PyExc_NotImplementedError,
-            "Must use casting to find the correct dtype, this is "
-            "not yet implemented! "
-            "(It should not be possible to hit this code currently!)");
-    return NULL;
-}
-
-
 /**
  * Discover the correct descriptor from a known DType class and scalar.
  * If the fixed DType can discover a dtype instance/descr all is fine,
@@ -392,7 +347,7 @@ find_scalar_descriptor(
         return descr;
     }
 
-    Py_SETREF(descr, cast_descriptor_to_fixed_dtype(descr, fixed_DType));
+    Py_SETREF(descr, PyArray_CastDescrToDType(descr, fixed_DType));
     return descr;
 }
 
@@ -727,8 +682,13 @@ find_descriptor_from_array(
     enum _dtype_discovery_flags flags = 0;
     *out_descr = NULL;
 
-    if (NPY_UNLIKELY(DType != NULL && DType->parametric &&
-            PyArray_ISOBJECT(arr))) {
+    if (DType == NULL) {
+        *out_descr = PyArray_DESCR(arr);
+        Py_INCREF(*out_descr);
+        return 0;
+    }
+
+    if (NPY_UNLIKELY(DType->parametric && PyArray_ISOBJECT(arr))) {
         /*
          * We have one special case, if (and only if) the input array is of
          * object DType and the dtype is not fixed already but parametric.
@@ -777,7 +737,7 @@ find_descriptor_from_array(
         }
         Py_DECREF(iter);
     }
-    else if (DType != NULL && NPY_UNLIKELY(DType->type_num == NPY_DATETIME) &&
+    else if (NPY_UNLIKELY(DType->type_num == NPY_DATETIME) &&
                 PyArray_ISSTRING(arr)) {
         /*
          * TODO: This branch should be deprecated IMO, the workaround is
@@ -806,8 +766,7 @@ find_descriptor_from_array(
          * If this is not an object array figure out the dtype cast,
          * or simply use the returned DType.
          */
-        *out_descr = cast_descriptor_to_fixed_dtype(
-                     PyArray_DESCR(arr), DType);
+        *out_descr = PyArray_CastDescrToDType(PyArray_DESCR(arr), DType);
         if (*out_descr == NULL) {
             return -1;
         }
@@ -1325,15 +1284,9 @@ PyArray_DiscoverDTypeAndShape(
          * the correct default.
          */
         if (fixed_DType != NULL) {
-            if (fixed_DType->default_descr == NULL) {
-                Py_INCREF(fixed_DType->singleton);
-                *out_descr = fixed_DType->singleton;
-            }
-            else {
-                *out_descr = fixed_DType->default_descr(fixed_DType);
-                if (*out_descr == NULL) {
-                    goto fail;
-                }
+            *out_descr = fixed_DType->default_descr(fixed_DType);
+            if (*out_descr == NULL) {
+                goto fail;
             }
         }
     }
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 4767901ef..ecaca72a1 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -47,7 +47,7 @@ static NPY_INLINE npy_bool
 PySequence_NoString_Check(PyObject *op) {
     return
         PySequence_Check(op) &&
-        !PyString_Check(op) &&
+        !PyBytes_Check(op) &&
         !PyUnicode_Check(op) &&
         !PyArray_IsZeroDim(op);
 }
@@ -175,7 +175,7 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
  *
  * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG,
  *         LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE#
- * #func1 = PyBool_FromLong, PyInt_FromLong*6, PyLong_FromUnsignedLong*2,
+ * #func1 = PyBool_FromLong, PyLong_FromLong*6, PyLong_FromUnsignedLong*2,
  *          PyLong_FromLongLong, PyLong_FromUnsignedLongLong,
  *          MyPyFloat_FromHalf, PyFloat_FromDouble*2#
  * #func2 = PyObject_IsTrue, MyPyLong_AsLong*6, MyPyLong_AsUnsignedLong*2,
@@ -4461,7 +4461,7 @@ set_typeinfo(PyObject *dict)
             return -1;
         }
     }
-    key = PyInt_FromLong(NPY_@name2@);
+    key = PyLong_FromLong(NPY_@name2@);
     if (key == NULL) {
         return -1;
     }
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index d9121707b..f700bdc99 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -1019,7 +1019,7 @@ promote_types(PyArray_Descr *type1, PyArray_Descr *type2,
  * Returns a new reference to type if it is already NBO, otherwise
  * returns a copy converted to NBO.
  */
-static PyArray_Descr *
+NPY_NO_EXPORT PyArray_Descr *
 ensure_dtype_nbo(PyArray_Descr *type)
 {
     if (PyArray_ISNBO(type->byteorder)) {
@@ -1031,327 +1031,148 @@ ensure_dtype_nbo(PyArray_Descr *type)
     }
 }
 
-/*NUMPY_API
- * Produces the smallest size and lowest kind type to which both
- * input types can be cast.
+
+/**
+ * This function should possibly become public API eventually.  At this
+ * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`.
+ * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement
+ * this logic.
+ * Before that, the API needs to be reviewed though.
+ *
+ * WARNING: This function currently does not guarantee that `descr` can
+ *          actually be cast to the given DType.
+ *
+ * @param descr The dtype instance to adapt "cast"
+ * @param given_DType The DType class for which we wish to find an instance able
+ *        to represent `descr`.
+ * @returns Instance of `given_DType`. If `given_DType` is parametric the
+ *          descr may be adapted to hold it.
  */
 NPY_NO_EXPORT PyArray_Descr *
-PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
+PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType)
 {
-    int type_num1, type_num2, ret_type_num;
-
-    /*
-     * Fast path for identical dtypes.
-     *
-     * Non-native-byte-order types are converted to native ones below, so we
-     * can't quit early.
-     */
-    if (type1 == type2 && PyArray_ISNBO(type1->byteorder)) {
-        Py_INCREF(type1);
-        return type1;
+    if (NPY_DTYPE(descr) == given_DType) {
+        Py_INCREF(descr);
+        return descr;
     }
-
-    type_num1 = type1->type_num;
-    type_num2 = type2->type_num;
-
-    /* If they're built-in types, use the promotion table */
-    if (type_num1 < NPY_NTYPES && type_num2 < NPY_NTYPES) {
-        ret_type_num = _npy_type_promotion_table[type_num1][type_num2];
+    if (!given_DType->parametric) {
         /*
-         * The table doesn't handle string/unicode/void/datetime/timedelta,
-         * so check the result
+         * Don't actually do anything, the default is always the result
+         * of any cast.
          */
-        if (ret_type_num >= 0) {
-            return PyArray_DescrFromType(ret_type_num);
-        }
+        return given_DType->default_descr(given_DType);
+    }
+    if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)given_DType)) {
+        Py_INCREF(descr);
+        return descr;
     }
-    /* If one or both are user defined, calculate it */
-    else {
-        int skind1 = NPY_NOSCALAR, skind2 = NPY_NOSCALAR, skind;
 
-        if (PyArray_CanCastTo(type2, type1)) {
-            /* Promoted types are always native byte order */
-            return ensure_dtype_nbo(type1);
-        }
-        else if (PyArray_CanCastTo(type1, type2)) {
-            /* Promoted types are always native byte order */
-            return ensure_dtype_nbo(type2);
-        }
+    if (!given_DType->legacy) {
+        PyErr_SetString(PyExc_NotImplementedError,
+                "Must use casting to find the correct DType for a parametric "
+                "user DType. This is not yet implemented (this error should be "
+                "unreachable).");
+        return NULL;
+    }
 
-        /* Convert the 'kind' char into a scalar kind */
-        switch (type1->kind) {
-            case 'b':
-                skind1 = NPY_BOOL_SCALAR;
-                break;
-            case 'u':
-                skind1 = NPY_INTPOS_SCALAR;
-                break;
-            case 'i':
-                skind1 = NPY_INTNEG_SCALAR;
-                break;
-            case 'f':
-                skind1 = NPY_FLOAT_SCALAR;
-                break;
-            case 'c':
-                skind1 = NPY_COMPLEX_SCALAR;
-                break;
-        }
-        switch (type2->kind) {
-            case 'b':
-                skind2 = NPY_BOOL_SCALAR;
-                break;
-            case 'u':
-                skind2 = NPY_INTPOS_SCALAR;
-                break;
-            case 'i':
-                skind2 = NPY_INTNEG_SCALAR;
-                break;
-            case 'f':
-                skind2 = NPY_FLOAT_SCALAR;
-                break;
-            case 'c':
-                skind2 = NPY_COMPLEX_SCALAR;
-                break;
-        }
+    PyArray_Descr *flex_dtype = PyArray_DescrNew(given_DType->singleton);
+    return PyArray_AdaptFlexibleDType(descr, flex_dtype);
+}
 
-        /* If both are scalars, there may be a promotion possible */
-        if (skind1 != NPY_NOSCALAR && skind2 != NPY_NOSCALAR) {
 
-            /* Start with the larger scalar kind */
-            skind = (skind1 > skind2) ? skind1 : skind2;
-            ret_type_num = _npy_smallest_type_of_kind_table[skind];
+/**
+ * This function defines the common DType operator.
+ *
+ * Note that the common DType will not be "object" (unless one of the dtypes
+ * is object), even though object can technically represent all values
+ * correctly.
+ *
+ * TODO: Before exposure, we should review the return value (e.g. no error
+ *       when no common DType is found).
+ *
+ * @param dtype1 DType class to find the common type for.
+ * @param dtype2 Second DType class.
+ * @return The common DType or NULL with an error set
+ */
+NPY_NO_EXPORT PyArray_DTypeMeta *
+PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2)
+{
+    if (dtype1 == dtype2) {
+        Py_INCREF(dtype1);
+        return dtype1;
+    }
 
-            for (;;) {
+    PyArray_DTypeMeta *common_dtype;
 
-                /* If there is no larger type of this kind, try a larger kind */
-                if (ret_type_num < 0) {
-                    ++skind;
-                    /* Use -1 to signal no promoted type found */
-                    if (skind < NPY_NSCALARKINDS) {
-                        ret_type_num = _npy_smallest_type_of_kind_table[skind];
-                    }
-                    else {
-                        break;
-                    }
-                }
+    common_dtype = dtype1->common_dtype(dtype1, dtype2);
+    if (common_dtype == (PyArray_DTypeMeta *)Py_NotImplemented) {
+        Py_DECREF(common_dtype);
+        common_dtype = dtype2->common_dtype(dtype2, dtype1);
+    }
+    if (common_dtype == NULL) {
+        return NULL;
+    }
+    if (common_dtype == (PyArray_DTypeMeta *)Py_NotImplemented) {
+        Py_DECREF(Py_NotImplemented);
+        PyErr_Format(PyExc_TypeError,
+                "The DTypes %S and %S do not have a common DType. "
+                "For example they cannot be stored in a single array unless "
+                "the dtype is `object`.", dtype1, dtype2);
+        return NULL;
+    }
+    return common_dtype;
+}
 
-                /* If we found a type to which we can promote both, done! */
-                if (PyArray_CanCastSafely(type_num1, ret_type_num) &&
-                            PyArray_CanCastSafely(type_num2, ret_type_num)) {
-                    return PyArray_DescrFromType(ret_type_num);
-                }
 
-                /* Try the next larger type of this kind */
-                ret_type_num = _npy_next_larger_type_table[ret_type_num];
-            }
+/*NUMPY_API
+ * Produces the smallest size and lowest kind type to which both
+ * input types can be cast.
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
+{
+    PyArray_DTypeMeta *common_dtype;
+    PyArray_Descr *res;
 
-        }
+    /* Fast path for identical inputs (NOTE: This path preserves metadata!) */
+    if (type1 == type2 && PyArray_ISNBO(type1->byteorder)) {
+        Py_INCREF(type1);
+        return type1;
+    }
 
-        PyErr_SetString(PyExc_TypeError,
-                "invalid type promotion with custom data type");
+    common_dtype = PyArray_CommonDType(NPY_DTYPE(type1), NPY_DTYPE(type2));
+    if (common_dtype == NULL) {
         return NULL;
     }
 
-    switch (type_num1) {
-        /* BOOL can convert to anything except datetime/void */
-        case NPY_BOOL:
-            if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) {
-                int char_size = 1;
-                if (type_num2 == NPY_UNICODE) {
-                    char_size = 4;
-                }
-                if (type2->elsize < 5 * char_size) {
-                    PyArray_Descr *ret = NULL;
-                    PyArray_Descr *temp = PyArray_DescrNew(type2);
-                    ret = ensure_dtype_nbo(temp);
-                    ret->elsize = 5 * char_size;
-                    Py_DECREF(temp);
-                    return ret;
-                }
-                return ensure_dtype_nbo(type2);
-            }
-            else if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) {
-                return ensure_dtype_nbo(type2);
-            }
-            break;
-        /* For strings and unicodes, take the larger size */
-        case NPY_STRING:
-            if (type_num2 == NPY_STRING) {
-                if (type1->elsize > type2->elsize) {
-                    return ensure_dtype_nbo(type1);
-                }
-                else {
-                    return ensure_dtype_nbo(type2);
-                }
-            }
-            else if (type_num2 == NPY_UNICODE) {
-                if (type2->elsize >= type1->elsize * 4) {
-                    return ensure_dtype_nbo(type2);
-                }
-                else {
-                    PyArray_Descr *d = PyArray_DescrNewFromType(NPY_UNICODE);
-                    if (d == NULL) {
-                        return NULL;
-                    }
-                    d->elsize = type1->elsize * 4;
-                    return d;
-                }
-            }
-            /* Allow NUMBER -> STRING */
-            else if (PyTypeNum_ISNUMBER(type_num2)) {
-                PyArray_Descr *ret = NULL;
-                PyArray_Descr *temp = PyArray_DescrNew(type1);
-                PyDataType_MAKEUNSIZED(temp);
-
-                temp = PyArray_AdaptFlexibleDType(type2, temp);
-                if (temp == NULL) {
-                    return NULL;
-                }
-                if (temp->elsize > type1->elsize) {
-                    ret = ensure_dtype_nbo(temp);
-                }
-                else {
-                    ret = ensure_dtype_nbo(type1);
-                }
-                Py_DECREF(temp);
-                return ret;
-            }
-            break;
-        case NPY_UNICODE:
-            if (type_num2 == NPY_UNICODE) {
-                if (type1->elsize > type2->elsize) {
-                    return ensure_dtype_nbo(type1);
-                }
-                else {
-                    return ensure_dtype_nbo(type2);
-                }
-            }
-            else if (type_num2 == NPY_STRING) {
-                if (type1->elsize >= type2->elsize * 4) {
-                    return ensure_dtype_nbo(type1);
-                }
-                else {
-                    PyArray_Descr *d = PyArray_DescrNewFromType(NPY_UNICODE);
-                    if (d == NULL) {
-                        return NULL;
-                    }
-                    d->elsize = type2->elsize * 4;
-                    return d;
-                }
-            }
-            /* Allow NUMBER -> UNICODE */
-            else if (PyTypeNum_ISNUMBER(type_num2)) {
-                PyArray_Descr *ret = NULL;
-                PyArray_Descr *temp = PyArray_DescrNew(type1);
-                PyDataType_MAKEUNSIZED(temp);
-                temp = PyArray_AdaptFlexibleDType(type2, temp);
-                if (temp == NULL) {
-                    return NULL;
-                }
-                if (temp->elsize > type1->elsize) {
-                    ret = ensure_dtype_nbo(temp);
-                }
-                else {
-                    ret = ensure_dtype_nbo(type1);
-                }
-                Py_DECREF(temp);
-                return ret;
-            }
-            break;
-        case NPY_DATETIME:
-        case NPY_TIMEDELTA:
-            if (type_num2 == NPY_DATETIME || type_num2 == NPY_TIMEDELTA) {
-                return datetime_type_promotion(type1, type2);
-            }
-            break;
+    if (!common_dtype->parametric) {
+        res = common_dtype->default_descr(common_dtype);
+        Py_DECREF(common_dtype);
+        return res;
     }
 
-    switch (type_num2) {
-        /* BOOL can convert to almost anything */
-        case NPY_BOOL:
-            if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) {
-                int char_size = 1;
-                if (type_num2 == NPY_UNICODE) {
-                    char_size = 4;
-                }
-                if (type2->elsize < 5 * char_size) {
-                    PyArray_Descr *ret = NULL;
-                    PyArray_Descr *temp = PyArray_DescrNew(type2);
-                    ret = ensure_dtype_nbo(temp);
-                    ret->elsize = 5 * char_size;
-                    Py_DECREF(temp);
-                    return ret;
-                }
-                return ensure_dtype_nbo(type2);
-            }
-            else if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA &&
-                                    type_num1 != NPY_VOID) {
-                return ensure_dtype_nbo(type1);
-            }
-            break;
-        case NPY_STRING:
-            /* Allow NUMBER -> STRING */
-            if (PyTypeNum_ISNUMBER(type_num1)) {
-                PyArray_Descr *ret = NULL;
-                PyArray_Descr *temp = PyArray_DescrNew(type2);
-                PyDataType_MAKEUNSIZED(temp);
-                temp = PyArray_AdaptFlexibleDType(type1, temp);
-                if (temp == NULL) {
-                    return NULL;
-                }
-                if (temp->elsize > type2->elsize) {
-                    ret = ensure_dtype_nbo(temp);
-                }
-                else {
-                    ret = ensure_dtype_nbo(type2);
-                }
-                Py_DECREF(temp);
-                return ret;
-            }
-            break;
-        case NPY_UNICODE:
-            /* Allow NUMBER -> UNICODE */
-            if (PyTypeNum_ISNUMBER(type_num1)) {
-                PyArray_Descr *ret = NULL;
-                PyArray_Descr *temp = PyArray_DescrNew(type2);
-                PyDataType_MAKEUNSIZED(temp);
-                temp = PyArray_AdaptFlexibleDType(type1, temp);
-                if (temp == NULL) {
-                    return NULL;
-                }
-                if (temp->elsize > type2->elsize) {
-                    ret = ensure_dtype_nbo(temp);
-                }
-                else {
-                    ret = ensure_dtype_nbo(type2);
-                }
-                Py_DECREF(temp);
-                return ret;
-            }
-            break;
-        case NPY_TIMEDELTA:
-            if (PyTypeNum_ISSIGNED(type_num1)) {
-                return ensure_dtype_nbo(type2);
-            }
-            break;
+    /* Cast the input types to the common DType if necessary */
+    type1 = PyArray_CastDescrToDType(type1, common_dtype);
+    if (type1 == NULL) {
+        Py_DECREF(common_dtype);
+        return NULL;
     }
-
-    /* For types equivalent up to endianness, can return either */
-    if (PyArray_CanCastTypeTo(type1, type2, NPY_EQUIV_CASTING)) {
-        return ensure_dtype_nbo(type1);
+    type2 = PyArray_CastDescrToDType(type2, common_dtype);
+    if (type2 == NULL) {
+        Py_DECREF(type1);
+        Py_DECREF(common_dtype);
+        return NULL;
     }
 
-    /* TODO: Also combine fields, subarrays, strings, etc */
-
     /*
-    printf("invalid type promotion: ");
-    PyObject_Print(type1, stdout, 0);
-    printf(" ");
-    PyObject_Print(type2, stdout, 0);
-    printf("\n");
-    */
-    PyErr_SetString(PyExc_TypeError, "invalid type promotion");
-    return NULL;
+     * And find the common instance of the two inputs
+     * NOTE: Common instance preserves metadata (normally and of one input)
+     */
+    res = common_dtype->common_instance(type1, type2);
+    Py_DECREF(type1);
+    Py_DECREF(type2);
+    Py_DECREF(common_dtype);
+    return res;
 }
 
 /*
diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h
index 9b7f39db2..a2b36b497 100644
--- a/numpy/core/src/multiarray/convert_datatype.h
+++ b/numpy/core/src/multiarray/convert_datatype.h
@@ -10,6 +10,9 @@ PyArray_ObjectType(PyObject *op, int minimum_type);
 NPY_NO_EXPORT PyArrayObject **
 PyArray_ConvertToCommonType(PyObject *op, int *retn);
 
+NPY_NO_EXPORT PyArray_DTypeMeta *
+PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2);
+
 NPY_NO_EXPORT int
 PyArray_ValidType(int type);
 
@@ -18,6 +21,9 @@ NPY_NO_EXPORT npy_bool
 can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data,
                     PyArray_Descr *to, NPY_CASTING casting);
 
+NPY_NO_EXPORT PyArray_Descr *
+ensure_dtype_nbo(PyArray_Descr *type);
+
 NPY_NO_EXPORT int
 should_use_min_scalar(npy_intp narrs, PyArrayObject **arr,
                       npy_intp ndtypes, PyArray_Descr **dtypes);
@@ -49,4 +55,7 @@ npy_set_invalid_cast_error(
 NPY_NO_EXPORT PyArray_Descr *
 PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype);
 
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType);
+
 #endif
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 95597b812..6e378f626 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -441,7 +441,7 @@ _convert_from_array_descr(PyObject *obj, int align)
         }
         PyObject *name = PyTuple_GET_ITEM(item, 0);
         PyObject *title;
-        if (PyBaseString_Check(name)) {
+        if (PyUnicode_Check(name)) {
             title = NULL;
         }
         else if (PyTuple_Check(name)) {
@@ -454,7 +454,7 @@ _convert_from_array_descr(PyObject *obj, int align)
             }
             title = PyTuple_GET_ITEM(name, 0);
             name = PyTuple_GET_ITEM(name, 1);
-            if (!PyBaseString_Check(name)) {
+            if (!PyUnicode_Check(name)) {
                 PyErr_SetString(PyExc_TypeError, "Field name must be a str");
                 goto fail;
             }
@@ -512,7 +512,7 @@ _convert_from_array_descr(PyObject *obj, int align)
         }
         if ((PyDict_GetItemWithError(fields, name) != NULL)
              || (title
-                 && PyBaseString_Check(title)
+                 && PyUnicode_Check(title)
                  && (PyDict_GetItemWithError(fields, title) != NULL))) {
             PyErr_Format(PyExc_ValueError,
                     "field %R occurs more than once", name);
@@ -550,7 +550,7 @@ _convert_from_array_descr(PyObject *obj, int align)
             if (PyDict_SetItem(fields, name, tup) < 0) {
                 goto fail;
             }
-            if (PyBaseString_Check(title)) {
+            if (PyUnicode_Check(title)) {
                 PyObject *existing = PyDict_GetItemWithError(fields, title);
                 if (existing == NULL && PyErr_Occurred()) {
                     goto fail;
@@ -1202,7 +1202,7 @@ _convert_from_dict(PyObject *obj, int align)
             Py_DECREF(tup);
             goto fail;
         }
-        if (!PyBaseString_Check(name)) {
+        if (!PyUnicode_Check(name)) {
             PyErr_SetString(PyExc_ValueError,
                     "field names must be strings");
             Py_DECREF(tup);
@@ -1228,7 +1228,7 @@ _convert_from_dict(PyObject *obj, int align)
             goto fail;
         }
         if (len == 3) {
-            if (PyBaseString_Check(title)) {
+            if (PyUnicode_Check(title)) {
                 if (PyDict_GetItemWithError(fields, title) != NULL) {
                     PyErr_SetString(PyExc_ValueError,
                             "title already used as a name or title.");
@@ -2153,7 +2153,7 @@ arraydescr_names_set(PyArray_Descr *self, PyObject *val)
         PyObject *item;
         int valid = 1;
         item = PySequence_GetItem(val, i);
-        valid = PyUString_Check(item);
+        valid = PyUnicode_Check(item);
         Py_DECREF(item);
         if (!valid) {
             PyErr_Format(PyExc_ValueError,
@@ -2788,7 +2788,7 @@ arraydescr_setstate(PyArray_Descr *self, PyObject *args)
 
         for (i = 0; i < PyTuple_GET_SIZE(names); ++i) {
             name = PyTuple_GET_ITEM(names, i);
-            if (!PyUString_Check(name)) {
+            if (!PyUnicode_Check(name)) {
                 names_ok = 0;
                 break;
             }
@@ -3020,7 +3020,7 @@ PyArray_DescrNewByteorder(PyArray_Descr *self, char newendian)
             if (NPY_TITLE_KEY(key, value)) {
                 continue;
             }
-            if (!PyUString_Check(key) || !PyTuple_Check(value) ||
+            if (!PyUnicode_Check(key) || !PyTuple_Check(value) ||
                 ((len=PyTuple_GET_SIZE(value)) < 2)) {
                 continue;
             }
@@ -3321,7 +3321,7 @@ _is_list_of_strings(PyObject *obj)
     seqlen = PyList_GET_SIZE(obj);
     for (i = 0; i < seqlen; i++) {
         PyObject *item = PyList_GET_ITEM(obj, i);
-        if (!PyBaseString_Check(item)) {
+        if (!PyUnicode_Check(item)) {
             return NPY_FALSE;
         }
     }
@@ -3431,7 +3431,7 @@ descr_subscript(PyArray_Descr *self, PyObject *op)
         return NULL;
     }
 
-    if (PyBaseString_Check(op)) {
+    if (PyUnicode_Check(op)) {
         return _subscript_by_name(self, op);
     }
     else if (_is_list_of_strings(op)) {
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 42c66ee7f..af4e6c22e 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -1322,95 +1322,21 @@ get_unicode_to_datetime_transfer_function(int aligned,
     return NPY_SUCCEED;
 }
 
+
 static int
-get_nbo_cast_transfer_function(int aligned,
-                            npy_intp src_stride, npy_intp dst_stride,
-                            PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
-                            int move_references,
-                            PyArray_StridedUnaryOp **out_stransfer,
-                            NpyAuxData **out_transferdata,
-                            int *out_needs_api,
-                            int *out_needs_wrap)
+get_legacy_dtype_cast_function(
+        int aligned, npy_intp src_stride, npy_intp dst_stride,
+        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+        int move_references,
+        PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata,
+        int *out_needs_api, int *out_needs_wrap)
 {
     _strided_cast_data *data;
     PyArray_VectorUnaryFunc *castfunc;
     PyArray_Descr *tmp_dtype;
-    npy_intp shape = 1, src_itemsize = src_dtype->elsize,
-            dst_itemsize = dst_dtype->elsize;
-
-    if (PyTypeNum_ISNUMBER(src_dtype->type_num) &&
-                    PyTypeNum_ISNUMBER(dst_dtype->type_num)) {
-        *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
-                          !PyArray_ISNBO(dst_dtype->byteorder);
-        return get_nbo_cast_numeric_transfer_function(aligned,
-                                    src_stride, dst_stride,
-                                    src_dtype->type_num, dst_dtype->type_num,
-                                    out_stransfer, out_transferdata);
-    }
-
-    if (src_dtype->type_num == NPY_DATETIME ||
-            src_dtype->type_num == NPY_TIMEDELTA ||
-            dst_dtype->type_num == NPY_DATETIME ||
-            dst_dtype->type_num == NPY_TIMEDELTA) {
-        /* A parameterized type, datetime->datetime sometimes needs casting */
-        if ((src_dtype->type_num == NPY_DATETIME &&
-                    dst_dtype->type_num == NPY_DATETIME) ||
-                (src_dtype->type_num == NPY_TIMEDELTA &&
-                    dst_dtype->type_num == NPY_TIMEDELTA)) {
-            *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
-                              !PyArray_ISNBO(dst_dtype->byteorder);
-            return get_nbo_cast_datetime_transfer_function(aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata);
-        }
-
-        /*
-         * Datetime <-> string conversions can be handled specially.
-         * The functions may raise an error if the strings have no
-         * space, or can't be parsed properly.
-         */
-        if (src_dtype->type_num == NPY_DATETIME) {
-            switch (dst_dtype->type_num) {
-                case NPY_STRING:
-                    *out_needs_api = 1;
-                    *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder);
-                    return get_nbo_datetime_to_string_transfer_function(
-                                        aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata);
-
-                case NPY_UNICODE:
-                    return get_datetime_to_unicode_transfer_function(
-                                        aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata,
-                                        out_needs_api);
-            }
-        }
-        else if (dst_dtype->type_num == NPY_DATETIME) {
-            switch (src_dtype->type_num) {
-                case NPY_STRING:
-                    *out_needs_api = 1;
-                    *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder);
-                    return get_nbo_string_to_datetime_transfer_function(
-                                        aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata);
-
-                case NPY_UNICODE:
-                    return get_unicode_to_datetime_transfer_function(
-                                        aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata,
-                                        out_needs_api);
-            }
-        }
-    }
+    npy_intp shape = 1;
+    npy_intp src_itemsize = src_dtype->elsize;
+    npy_intp dst_itemsize = dst_dtype->elsize;
 
     *out_needs_wrap = !aligned ||
                       !PyArray_ISNBO(src_dtype->byteorder) ||
@@ -1543,6 +1469,167 @@ get_nbo_cast_transfer_function(int aligned,
     return NPY_SUCCEED;
 }
 
+
+static int
+get_nbo_cast_transfer_function(int aligned,
+                            npy_intp src_stride, npy_intp dst_stride,
+                            PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+                            int move_references,
+                            PyArray_StridedUnaryOp **out_stransfer,
+                            NpyAuxData **out_transferdata,
+                            int *out_needs_api,
+                            int *out_needs_wrap)
+{
+    if (PyTypeNum_ISNUMBER(src_dtype->type_num) &&
+                    PyTypeNum_ISNUMBER(dst_dtype->type_num)) {
+        *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
+                          !PyArray_ISNBO(dst_dtype->byteorder);
+        return get_nbo_cast_numeric_transfer_function(aligned,
+                                    src_stride, dst_stride,
+                                    src_dtype->type_num, dst_dtype->type_num,
+                                    out_stransfer, out_transferdata);
+    }
+
+    if (src_dtype->type_num == NPY_DATETIME ||
+            src_dtype->type_num == NPY_TIMEDELTA ||
+            dst_dtype->type_num == NPY_DATETIME ||
+            dst_dtype->type_num == NPY_TIMEDELTA) {
+        /* A parameterized type, datetime->datetime sometimes needs casting */
+        if ((src_dtype->type_num == NPY_DATETIME &&
+                    dst_dtype->type_num == NPY_DATETIME) ||
+                (src_dtype->type_num == NPY_TIMEDELTA &&
+                    dst_dtype->type_num == NPY_TIMEDELTA)) {
+            *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
+                              !PyArray_ISNBO(dst_dtype->byteorder);
+            return get_nbo_cast_datetime_transfer_function(aligned,
+                                        src_stride, dst_stride,
+                                        src_dtype, dst_dtype,
+                                        out_stransfer, out_transferdata);
+        }
+
+        /*
+         * Datetime <-> string conversions can be handled specially.
+         * The functions may raise an error if the strings have no
+         * space, or can't be parsed properly.
+         */
+        if (src_dtype->type_num == NPY_DATETIME) {
+            switch (dst_dtype->type_num) {
+                case NPY_STRING:
+                    *out_needs_api = 1;
+                    *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder);
+                    return get_nbo_datetime_to_string_transfer_function(
+                                        aligned,
+                                        src_stride, dst_stride,
+                                        src_dtype, dst_dtype,
+                                        out_stransfer, out_transferdata);
+
+                case NPY_UNICODE:
+                    return get_datetime_to_unicode_transfer_function(
+                                        aligned,
+                                        src_stride, dst_stride,
+                                        src_dtype, dst_dtype,
+                                        out_stransfer, out_transferdata,
+                                        out_needs_api);
+            }
+        }
+        else if (dst_dtype->type_num == NPY_DATETIME) {
+            switch (src_dtype->type_num) {
+                case NPY_STRING:
+                    *out_needs_api = 1;
+                    *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder);
+                    return get_nbo_string_to_datetime_transfer_function(
+                                        aligned,
+                                        src_stride, dst_stride,
+                                        src_dtype, dst_dtype,
+                                        out_stransfer, out_transferdata);
+
+                case NPY_UNICODE:
+                    return get_unicode_to_datetime_transfer_function(
+                                        aligned,
+                                        src_stride, dst_stride,
+                                        src_dtype, dst_dtype,
+                                        out_stransfer, out_transferdata,
+                                        out_needs_api);
+            }
+        }
+    }
+
+    return get_legacy_dtype_cast_function(
+            aligned, src_stride, dst_stride, src_dtype, dst_dtype,
+            move_references, out_stransfer, out_transferdata,
+            out_needs_api, out_needs_wrap);
+}
+
+
+static int
+wrap_aligned_contig_transfer_function_with_copyswapn(
+        int aligned, npy_intp src_stride, npy_intp dst_stride,
+        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+        PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata,
+        int *out_needs_api,
+        PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata)
+{
+    NpyAuxData *todata = NULL, *fromdata = NULL;
+    PyArray_StridedUnaryOp *tobuffer, *frombuffer;
+    npy_intp src_itemsize = src_dtype->elsize;
+    npy_intp dst_itemsize = dst_dtype->elsize;
+
+    /* Get the copy/swap operation from src */
+    PyArray_GetDTypeCopySwapFn(
+            aligned, src_stride, src_itemsize, src_dtype, &tobuffer, &todata);
+
+    if (!PyDataType_REFCHK(dst_dtype)) {
+        /* Copying from buffer is a simple copy/swap operation */
+        PyArray_GetDTypeCopySwapFn(
+                aligned, dst_itemsize, dst_stride, dst_dtype,
+                &frombuffer, &fromdata);
+    }
+    else {
+        /*
+         * Since the buffer is initialized to NULL, need to move the
+         * references in order to DECREF the existing data.
+         */
+         /* Object types cannot be byte swapped */
+        assert(PyDataType_ISNOTSWAPPED(dst_dtype));
+        /* The loop already needs the python api if this is reached */
+        assert(*out_needs_api);
+
+        if (PyArray_GetDTypeTransferFunction(
+                aligned, dst_itemsize, dst_stride,
+                dst_dtype, dst_dtype, 1,
+                &frombuffer, &fromdata, out_needs_api) != NPY_SUCCEED) {
+            return NPY_FAIL;
+        }
+    }
+
+    if (frombuffer == NULL || tobuffer == NULL) {
+        NPY_AUXDATA_FREE(castdata);
+        NPY_AUXDATA_FREE(todata);
+        NPY_AUXDATA_FREE(fromdata);
+        return NPY_FAIL;
+    }
+
+    *out_stransfer = caststransfer;
+
+    /* Wrap it all up in a new transfer function + data */
+    if (wrap_aligned_contig_transfer_function(
+                        src_itemsize, dst_itemsize,
+                        tobuffer, todata,
+                        frombuffer, fromdata,
+                        caststransfer, castdata,
+                        PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT),
+                        *out_needs_api,
+                        out_stransfer, out_transferdata) != NPY_SUCCEED) {
+        NPY_AUXDATA_FREE(castdata);
+        NPY_AUXDATA_FREE(todata);
+        NPY_AUXDATA_FREE(fromdata);
+        return NPY_FAIL;
+    }
+
+    return NPY_SUCCEED;
+}
+
+
 static int
 get_cast_transfer_function(int aligned,
                             npy_intp src_stride, npy_intp dst_stride,
@@ -1553,10 +1640,8 @@ get_cast_transfer_function(int aligned,
                             int *out_needs_api)
 {
     PyArray_StridedUnaryOp *caststransfer;
-    NpyAuxData *castdata, *todata = NULL, *fromdata = NULL;
+    NpyAuxData *castdata;
     int needs_wrap = 0;
-    npy_intp src_itemsize = src_dtype->elsize,
-            dst_itemsize = dst_dtype->elsize;
 
     if (get_nbo_cast_transfer_function(aligned,
                             src_stride, dst_stride,
@@ -1581,64 +1666,10 @@ get_cast_transfer_function(int aligned,
     }
     /* Otherwise, we have to copy and/or swap to aligned temporaries */
     else {
-        PyArray_StridedUnaryOp *tobuffer, *frombuffer;
-
-        /* Get the copy/swap operation from src */
-        PyArray_GetDTypeCopySwapFn(aligned,
-                                src_stride, src_itemsize,
-                                src_dtype,
-                                &tobuffer, &todata);
-
-        if (!PyDataType_REFCHK(dst_dtype)) {
-            /* Copying from buffer is a simple copy/swap operation */
-            PyArray_GetDTypeCopySwapFn(aligned,
-                                    dst_itemsize, dst_stride,
-                                    dst_dtype,
-                                    &frombuffer, &fromdata);
-        }
-        else {
-            /*
-             * Since the buffer is initialized to NULL, need to move the
-             * references in order to DECREF the existing data.
-             */
-             /* Object types cannot be byte swapped */
-            assert(PyDataType_ISNOTSWAPPED(dst_dtype));
-            /* The loop already needs the python api if this is reached */
-            assert(*out_needs_api);
-
-            if (PyArray_GetDTypeTransferFunction(
-                    aligned, dst_itemsize, dst_stride,
-                    dst_dtype, dst_dtype, 1,
-                    &frombuffer, &fromdata, out_needs_api) != NPY_SUCCEED) {
-                return NPY_FAIL;
-            }
-        }
-
-        if (frombuffer == NULL || tobuffer == NULL) {
-            NPY_AUXDATA_FREE(castdata);
-            NPY_AUXDATA_FREE(todata);
-            NPY_AUXDATA_FREE(fromdata);
-            return NPY_FAIL;
-        }
-
-        *out_stransfer = caststransfer;
-
-        /* Wrap it all up in a new transfer function + data */
-        if (wrap_aligned_contig_transfer_function(
-                            src_itemsize, dst_itemsize,
-                            tobuffer, todata,
-                            frombuffer, fromdata,
-                            caststransfer, castdata,
-                            PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT),
-                            *out_needs_api,
-                            out_stransfer, out_transferdata) != NPY_SUCCEED) {
-            NPY_AUXDATA_FREE(castdata);
-            NPY_AUXDATA_FREE(todata);
-            NPY_AUXDATA_FREE(fromdata);
-            return NPY_FAIL;
-        }
-
-        return NPY_SUCCEED;
+        return wrap_aligned_contig_transfer_function_with_copyswapn(
+                aligned, src_stride, dst_stride, src_dtype, dst_dtype,
+                out_stransfer, out_transferdata, out_needs_api,
+                caststransfer, castdata);
     }
 }
 
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 3026e68e9..531f746d8 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -15,6 +15,9 @@
 #include "dtypemeta.h"
 #include "_datetime.h"
 #include "array_coercion.h"
+#include "scalartypes.h"
+#include "convert_datatype.h"
+#include "usertypes.h"
 
 
 static void
@@ -194,6 +197,14 @@ discover_datetime_and_timedelta_from_pyobject(
 
 
 static PyArray_Descr *
+nonparametric_default_descr(PyArray_DTypeMeta *cls)
+{
+    Py_INCREF(cls->singleton);
+    return cls->singleton;
+}
+
+
+static PyArray_Descr *
 flexible_default_descr(PyArray_DTypeMeta *cls)
 {
     PyArray_Descr *res = PyArray_DescrNewFromType(cls->type_num);
@@ -208,6 +219,34 @@ flexible_default_descr(PyArray_DTypeMeta *cls)
 }
 
 
+static PyArray_Descr *
+string_unicode_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
+{
+    if (descr1->elsize >= descr2->elsize) {
+        return ensure_dtype_nbo(descr1);
+    }
+    else {
+        return ensure_dtype_nbo(descr2);
+    }
+}
+
+
+static PyArray_Descr *
+void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
+{
+    /*
+     * We currently do not support promotion of void types unless they
+     * are equivalent.
+     */
+    if (!PyArray_CanCastTypeTo(descr1, descr2, NPY_EQUIV_CASTING)) {
+        PyErr_SetString(PyExc_TypeError,
+                "invalid type promotion with structured or void datatype(s).");
+        return NULL;
+    }
+    Py_INCREF(descr1);
+    return descr1;
+}
+
 static int
 python_builtins_are_known_scalar_types(
         PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype)
@@ -253,7 +292,7 @@ datetime_known_scalar_types(
      * must take charge. Otherwise we would attempt casting which does not
      * truly support this. Only object arrays are special cased in this way.
      */
-    return (PyType_IsSubtype(pytype, &PyString_Type) ||
+    return (PyType_IsSubtype(pytype, &PyBytes_Type) ||
             PyType_IsSubtype(pytype, &PyUnicode_Type));
 }
 
@@ -281,6 +320,86 @@ string_known_scalar_types(
 }
 
 
+/*
+ * The following set of functions define the common dtype operator for
+ * the builtin types.
+ */
+static PyArray_DTypeMeta *
+default_builtin_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
+{
+    assert(cls->type_num < NPY_NTYPES);
+    if (!other->legacy || other->type_num > cls->type_num) {
+        /* Let the more generic (larger type number) DType handle this */
+        Py_INCREF(Py_NotImplemented);
+        return (PyArray_DTypeMeta *)Py_NotImplemented;
+    }
+
+    /*
+     * Note: The use of the promotion table should probably be revised at
+     *       some point. It may be most useful to remove it entirely and then
+     *       consider adding a fast path/cache `PyArray_CommonDType()` itself.
+     */
+    int common_num = _npy_type_promotion_table[cls->type_num][other->type_num];
+    if (common_num < 0) {
+        Py_INCREF(Py_NotImplemented);
+        return (PyArray_DTypeMeta *)Py_NotImplemented;
+    }
+    return PyArray_DTypeFromTypeNum(common_num);
+}
+
+
+static PyArray_DTypeMeta *
+string_unicode_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
+{
+    assert(cls->type_num < NPY_NTYPES);
+    if (!other->legacy || other->type_num > cls->type_num ||
+        other->type_num == NPY_OBJECT) {
+        /* Let the more generic (larger type number) DType handle this */
+        Py_INCREF(Py_NotImplemented);
+        return (PyArray_DTypeMeta *)Py_NotImplemented;
+    }
+    /*
+     * The builtin types are ordered by complexity (aside from object) here.
+     * Arguably, we should not consider numbers and strings "common", but
+     * we currently do.
+     */
+    Py_INCREF(cls);
+    return cls;
+}
+
+static PyArray_DTypeMeta *
+datetime_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
+{
+    if (cls->type_num == NPY_DATETIME && other->type_num == NPY_TIMEDELTA) {
+        /*
+         * TODO: We actually currently do allow promotion here. This is
+         *       currently relied on within `np.add(datetime, timedelta)`,
+         *       while for concatenation the cast step will fail.
+         */
+        Py_INCREF(cls);
+        return cls;
+    }
+    return default_builtin_common_dtype(cls, other);
+}
+
+
+
+static PyArray_DTypeMeta *
+object_common_dtype(
+        PyArray_DTypeMeta *cls, PyArray_DTypeMeta *NPY_UNUSED(other))
+{
+    /*
+     * The object DType is special in that it can represent everything,
+     * including all potential user DTypes.
+     * One reason to defer (or error) here might be if the other DType
+     * does not support scalars so that e.g. `arr1d[0]` returns a 0-D array
+     * and `arr.astype(object)` would fail. But object casts are special.
+     */
+    Py_INCREF(cls);
+    return cls;
+}
+
+
 /**
  * This function takes a PyArray_Descr and replaces its base class with
  * a newly created dtype subclass (DTypeMeta instances).
@@ -398,14 +517,28 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     dtype_class->f = descr->f;
     dtype_class->kind = descr->kind;
 
-    /* Strings and voids have (strange) logic around scalars. */
+    /* Set default functions (correct for most dtypes, override below) */
+    dtype_class->default_descr = nonparametric_default_descr;
+    dtype_class->discover_descr_from_pyobject = (
+            nonparametric_discover_descr_from_pyobject);
     dtype_class->is_known_scalar_type = python_builtins_are_known_scalar_types;
+    dtype_class->common_dtype = default_builtin_common_dtype;
+    dtype_class->common_instance = NULL;
 
-    if (PyTypeNum_ISDATETIME(descr->type_num)) {
+    if (PyTypeNum_ISUSERDEF(descr->type_num)) {
+        dtype_class->common_dtype = legacy_userdtype_common_dtype_function;
+    }
+    else if (descr->type_num == NPY_OBJECT) {
+        dtype_class->common_dtype = object_common_dtype;
+    }
+    else if (PyTypeNum_ISDATETIME(descr->type_num)) {
         /* Datetimes are flexible, but were not considered previously */
         dtype_class->parametric = NPY_TRUE;
+        dtype_class->default_descr = flexible_default_descr;
         dtype_class->discover_descr_from_pyobject = (
                 discover_datetime_and_timedelta_from_pyobject);
+        dtype_class->common_dtype = datetime_common_dtype;
+        dtype_class->common_instance = datetime_type_promotion;
         if (descr->type_num == NPY_DATETIME) {
             dtype_class->is_known_scalar_type = datetime_known_scalar_types;
         }
@@ -416,18 +549,16 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
         if (descr->type_num == NPY_VOID) {
             dtype_class->discover_descr_from_pyobject = (
                     void_discover_descr_from_pyobject);
+            dtype_class->common_instance = void_common_instance;
         }
         else {
             dtype_class->is_known_scalar_type = string_known_scalar_types;
             dtype_class->discover_descr_from_pyobject = (
                     string_discover_descr_from_pyobject);
+            dtype_class->common_dtype = string_unicode_common_dtype;
+            dtype_class->common_instance = string_unicode_common_instance;
         }
     }
-    else {
-        /* nonparametric case */
-        dtype_class->discover_descr_from_pyobject = (
-                nonparametric_discover_descr_from_pyobject);
-    }
 
     if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj,
             PyTypeNum_ISUSERDEF(dtype_class->type_num)) < 0) {
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index e0909a7eb..83cf7c07e 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -2,6 +2,22 @@
 #define _NPY_DTYPEMETA_H
 
 #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))
+/*
+ * This function will hopefully be phased out or replaced, but was convenient
+ * for incremental implementation of new DTypes based on DTypeMeta.
+ * (Error checking is not required for DescrFromType, assuming that the
+ * type is valid.)
+ */
+static NPY_INLINE PyArray_DTypeMeta *
+PyArray_DTypeFromTypeNum(int typenum)
+{
+    PyArray_Descr *descr = PyArray_DescrFromType(typenum);
+    PyArray_DTypeMeta *dtype = NPY_DTYPE(descr);
+    Py_INCREF(dtype);
+    Py_DECREF(descr);
+    return dtype;
+}
+
 
 NPY_NO_EXPORT int
 dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem);
diff --git a/numpy/core/src/multiarray/hashdescr.c b/numpy/core/src/multiarray/hashdescr.c
index 0b23b6c21..c596a7098 100644
--- a/numpy/core/src/multiarray/hashdescr.c
+++ b/numpy/core/src/multiarray/hashdescr.c
@@ -132,7 +132,7 @@ static int _array_descr_walk_fields(PyObject *names, PyObject* fields, PyObject*
                     "(Hash) names and fields inconsistent ???");
             return -1;
         }
-        if (!PyUString_Check(key)) {
+        if (!PyUnicode_Check(key)) {
             PyErr_SetString(PyExc_SystemError,
                     "(Hash) key of dtype dict not a string ???");
             return -1;
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 96f501c55..31795b2d0 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -61,7 +61,7 @@ parse_index_entry(PyObject *op, npy_intp *step_size,
     }
     else if (PySlice_Check(op)) {
         npy_intp stop;
-        if (NpySlice_GetIndicesEx(op, max, &i, &stop, step_size, n_steps) < 0) {
+        if (PySlice_GetIndicesEx(op, max, &i, &stop, step_size, n_steps) < 0) {
             goto fail;
         }
         if (*n_steps <= 0) {
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index fdf248c97..0998a6b49 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -233,7 +233,7 @@ unpack_indices(PyObject *index, PyObject **result, npy_intp result_n)
             || PySlice_Check(index)
             || PyArray_Check(index)
             || !PySequence_Check(index)
-            || PyBaseString_Check(index)) {
+            || PyUnicode_Check(index)) {
 
         return unpack_scalar(index, result, result_n);
     }
@@ -1407,7 +1407,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
     *view = NULL;
 
     /* first check for a single field name */
-    if (PyBaseString_Check(ind)) {
+    if (PyUnicode_Check(ind)) {
         PyObject *tup;
         PyArray_Descr *fieldtype;
         npy_intp offset;
@@ -1471,7 +1471,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
                 PyErr_Clear();
                 return -1;
             }
-            is_string = PyBaseString_Check(item);
+            is_string = PyUnicode_Check(item);
             Py_DECREF(item);
             if (!is_string) {
                 return -1;
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 276ceabc4..f1d5ab694 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -2057,7 +2057,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
                 Py_XDECREF(tmpobj);
                 return NULL;
             }
-            if (PyString_GET_SIZE(obj) < typecode->elsize) {
+            if (PyBytes_GET_SIZE(obj) < typecode->elsize) {
                 PyErr_SetString(PyExc_ValueError,
                         "initialization string is too small");
                 Py_XDECREF(tmpobj);
@@ -2226,7 +2226,7 @@ array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds)
         Py_DECREF(file);
         return NULL;
     }
-    if (PyString_Check(file) || PyUnicode_Check(file)) {
+    if (PyBytes_Check(file) || PyUnicode_Check(file)) {
         Py_SETREF(file, npy_PyFile_OpenFile(file, "rb"));
         if (file == NULL) {
             Py_XDECREF(type);
@@ -2793,7 +2793,7 @@ array_einsum(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
     arg0 = PyTuple_GET_ITEM(args, 0);
 
     /* einsum('i,j', a, b), einsum('i,j->ij', a, b) */
-    if (PyString_Check(arg0) || PyUnicode_Check(arg0)) {
+    if (PyBytes_Check(arg0) || PyUnicode_Check(arg0)) {
         nop = einsum_sub_op_from_str(args, &str_obj, &subscripts, op);
     }
     /* einsum(a, [0], b, [1]), einsum(a, [0], b, [1], [0,1]) */
@@ -3876,7 +3876,7 @@ _vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUSED(kw
     }
 
     if (PyArray_TYPE(char_array) == NPY_STRING) {
-        method = PyObject_GetAttr((PyObject *)&PyString_Type, method_name);
+        method = PyObject_GetAttr((PyObject *)&PyBytes_Type, method_name);
     }
     else if (PyArray_TYPE(char_array) == NPY_UNICODE) {
         method = PyObject_GetAttr((PyObject *)&PyUnicode_Type, method_name);
@@ -4337,7 +4337,7 @@ setup_scalartypes(PyObject *NPY_UNUSED(dict))
     if (PyType_Ready(&PyComplex_Type) < 0) {
         return -1;
     }
-    if (PyType_Ready(&PyString_Type) < 0) {
+    if (PyType_Ready(&PyBytes_Type) < 0) {
         return -1;
     }
     if (PyType_Ready(&PyUnicode_Type) < 0) {
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index b2f52f554..b918786f2 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -613,7 +613,7 @@ PyArray_DescrFromScalar(PyObject *sc)
         PyArray_DESCR_REPLACE(descr);
         type_num = descr->type_num;
         if (type_num == NPY_STRING) {
-            descr->elsize = PyString_GET_SIZE(sc);
+            descr->elsize = PyBytes_GET_SIZE(sc);
         }
         else if (type_num == NPY_UNICODE) {
             descr->elsize = PyUnicode_GET_LENGTH(sc) * 4;
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index c1bff1e42..5a3f4922a 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -149,7 +149,7 @@ static PyObject *
 gentype_add(PyObject *m1, PyObject* m2)
 {
     /* special case str.__radd__, which should not call array_add */
-    if (PyString_Check(m1) || PyUnicode_Check(m1)) {
+    if (PyBytes_Check(m1) || PyUnicode_Check(m1)) {
         Py_INCREF(Py_NotImplemented);
         return Py_NotImplemented;
     }
@@ -1058,7 +1058,7 @@ gentype_richcompare(PyObject *self, PyObject *other, int cmp_op)
 static PyObject *
 gentype_ndim_get(PyObject *NPY_UNUSED(self))
 {
-    return PyInt_FromLong(0);
+    return PyLong_FromLong(0);
 }
 
 static PyObject *
@@ -1099,7 +1099,7 @@ inttype_numerator_get(PyObject *self)
 static PyObject *
 inttype_denominator_get(PyObject *self)
 {
-    return PyInt_FromLong(1);
+    return PyLong_FromLong(1);
 }
 
 
@@ -1119,7 +1119,7 @@ gentype_itemsize_get(PyObject *self)
 
     typecode = PyArray_DescrFromScalar(self);
     elsize = typecode->elsize;
-    ret = PyInt_FromLong((long) elsize);
+    ret = PyLong_FromLong((long) elsize);
     Py_DECREF(typecode);
     return ret;
 }
@@ -1127,7 +1127,7 @@ gentype_itemsize_get(PyObject *self)
 static PyObject *
 gentype_size_get(PyObject *NPY_UNUSED(self))
 {
-    return PyInt_FromLong(1);
+    return PyLong_FromLong(1);
 }
 
 static PyObject *
@@ -1311,7 +1311,7 @@ gentype_imag_get(PyObject *self)
         ret = PyObject_GetAttrString(obj, "imag");
         if (ret == NULL) {
             PyErr_Clear();
-            obj = PyInt_FromLong(0);
+            obj = PyLong_FromLong(0);
             newtype = PyArray_DescrFromType(NPY_OBJECT);
             ret = PyArray_Scalar((char *)&obj, newtype, NULL);
             Py_DECREF(newtype);
@@ -2316,7 +2316,7 @@ voidtype_ass_subscript(PyVoidScalarObject *self, PyObject *ind, PyObject *val)
         return -1;
     }
 
-    if (PyBaseString_Check(ind)) {
+    if (PyUnicode_Check(ind)) {
         /*
          * Much like in voidtype_setfield, we cannot simply use ndarray's
          * __setitem__ since assignment to void scalars should not broadcast
@@ -2897,7 +2897,7 @@ bool_arrtype_nonzero(PyObject *a)
  *         ulong, ulonglong#
  * #Name = Byte, Short, Int, Long, UByte, UShort, LongLong, UInt,
  *         ULong, ULongLong#
- * #type = PyInt_FromLong*6, PyLong_FromLongLong*1,
+ * #type = PyLong_FromLong*6, PyLong_FromLongLong*1,
  *         PyLong_FromUnsignedLong*2, PyLong_FromUnsignedLongLong#
  */
 static PyNumberMethods @name@_arrtype_as_number;
@@ -2926,7 +2926,7 @@ bool_index(PyObject *a)
         return NULL;
     }
     else {
-        return PyInt_FromLong(PyArrayScalar_VAL(a, Bool));
+        return PyLong_FromLong(PyArrayScalar_VAL(a, Bool));
     }
 }
 
diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c
index 6b6c6bd9d..3727567e0 100644
--- a/numpy/core/src/multiarray/usertypes.c
+++ b/numpy/core/src/multiarray/usertypes.c
@@ -38,6 +38,7 @@ maintainer email:  oliphant.travis@ieee.org
 
 #include "usertypes.h"
 #include "dtypemeta.h"
+#include "scalartypes.h"
 
 NPY_NO_EXPORT PyArray_Descr **userdescrs=NULL;
 
@@ -127,6 +128,9 @@ PyArray_InitArrFuncs(PyArray_ArrFuncs *f)
     f->scalarkind = NULL;
     f->cancastscalarkindto = NULL;
     f->cancastto = NULL;
+    f->fastclip = NULL;
+    f->fastputmask = NULL;
+    f->fasttake = NULL;
 }
 
 
@@ -347,3 +351,123 @@ PyArray_RegisterCanCast(PyArray_Descr *descr, int totype,
         return _append_new(&descr->f->cancastscalarkindto[scalar], totype);
     }
 }
+
+
+/*
+ * Legacy user DTypes implemented the common DType operation
+ * (as used in type promotion/result_type, and e.g. the type for
+ * concatenation), by using "safe cast" logic.
+ *
+ * New DTypes do have this behaviour generally, but we use can-cast
+ * when legacy user dtypes are involved.
+ */
+NPY_NO_EXPORT PyArray_DTypeMeta *
+legacy_userdtype_common_dtype_function(
+        PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
+{
+    int skind1 = NPY_NOSCALAR, skind2 = NPY_NOSCALAR, skind;
+
+    if (!other->legacy) {
+        /* legacy DTypes can always defer to new style ones */
+        Py_INCREF(Py_NotImplemented);
+        return (PyArray_DTypeMeta *)Py_NotImplemented;
+    }
+    /* Defer so that only one of the types handles the cast */
+    if (cls->type_num < other->type_num) {
+        Py_INCREF(Py_NotImplemented);
+        return (PyArray_DTypeMeta *)Py_NotImplemented;
+    }
+
+    /* Check whether casting is possible from one type to the other */
+    if (PyArray_CanCastSafely(cls->type_num, other->type_num)) {
+        Py_INCREF(other);
+        return other;
+    }
+    if (PyArray_CanCastSafely(other->type_num, cls->type_num)) {
+        Py_INCREF(cls);
+        return cls;
+    }
+
+    /*
+     * The following code used to be part of PyArray_PromoteTypes().
+     * We can expect that this code is never used.
+     * In principle, it allows for promotion of two different user dtypes
+     * to a single NumPy dtype of the same "kind". In practice
+     * using the same `kind` as NumPy was never possible due to an
+     * simplification where `PyArray_EquivTypes(descr1, descr2)` will
+     * return True if both kind and element size match (e.g. bfloat16 and
+     * float16 would be equivalent).
+     * The option is also very obscure and not used in the examples.
+     */
+
+    /* Convert the 'kind' char into a scalar kind */
+    switch (cls->kind) {
+        case 'b':
+            skind1 = NPY_BOOL_SCALAR;
+            break;
+        case 'u':
+            skind1 = NPY_INTPOS_SCALAR;
+            break;
+        case 'i':
+            skind1 = NPY_INTNEG_SCALAR;
+            break;
+        case 'f':
+            skind1 = NPY_FLOAT_SCALAR;
+            break;
+        case 'c':
+            skind1 = NPY_COMPLEX_SCALAR;
+            break;
+    }
+    switch (other->kind) {
+        case 'b':
+            skind2 = NPY_BOOL_SCALAR;
+            break;
+        case 'u':
+            skind2 = NPY_INTPOS_SCALAR;
+            break;
+        case 'i':
+            skind2 = NPY_INTNEG_SCALAR;
+            break;
+        case 'f':
+            skind2 = NPY_FLOAT_SCALAR;
+            break;
+        case 'c':
+            skind2 = NPY_COMPLEX_SCALAR;
+            break;
+    }
+
+    /* If both are scalars, there may be a promotion possible */
+    if (skind1 != NPY_NOSCALAR && skind2 != NPY_NOSCALAR) {
+
+        /* Start with the larger scalar kind */
+        skind = (skind1 > skind2) ? skind1 : skind2;
+        int ret_type_num = _npy_smallest_type_of_kind_table[skind];
+
+        for (;;) {
+
+            /* If there is no larger type of this kind, try a larger kind */
+            if (ret_type_num < 0) {
+                ++skind;
+                /* Use -1 to signal no promoted type found */
+                if (skind < NPY_NSCALARKINDS) {
+                    ret_type_num = _npy_smallest_type_of_kind_table[skind];
+                }
+                else {
+                    break;
+                }
+            }
+
+            /* If we found a type to which we can promote both, done! */
+            if (PyArray_CanCastSafely(cls->type_num, ret_type_num) &&
+                PyArray_CanCastSafely(other->type_num, ret_type_num)) {
+                return PyArray_DTypeFromTypeNum(ret_type_num);
+            }
+
+            /* Try the next larger type of this kind */
+            ret_type_num = _npy_next_larger_type_table[ret_type_num];
+        }
+    }
+
+    Py_INCREF(Py_NotImplemented);
+    return (PyArray_DTypeMeta *)Py_NotImplemented;
+}
diff --git a/numpy/core/src/multiarray/usertypes.h b/numpy/core/src/multiarray/usertypes.h
index b3e386c5c..1b323d458 100644
--- a/numpy/core/src/multiarray/usertypes.h
+++ b/numpy/core/src/multiarray/usertypes.h
@@ -17,4 +17,8 @@ NPY_NO_EXPORT int
 PyArray_RegisterCastFunc(PyArray_Descr *descr, int totype,
                          PyArray_VectorUnaryFunc *castfunc);
 
+NPY_NO_EXPORT PyArray_DTypeMeta *
+legacy_userdtype_common_dtype_function(
+        PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other);
+
 #endif
diff --git a/numpy/core/src/umath/_rational_tests.c.src b/numpy/core/src/umath/_rational_tests.c.src
index e611a0847..08c259d98 100644
--- a/numpy/core/src/umath/_rational_tests.c.src
+++ b/numpy/core/src/umath/_rational_tests.c.src
@@ -430,7 +430,7 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
         PyObject* y;
         int eq;
         x[i] = PyTuple_GET_ITEM(args, i);
-        n[i] = PyInt_AsLong(x[i]);
+        n[i] = PyLong_AsLong(x[i]);
         if (error_converting(n[i])) {
             if (PyErr_ExceptionMatches(PyExc_TypeError)) {
                 PyErr_Format(PyExc_TypeError,
@@ -441,7 +441,7 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
             return 0;
         }
         /* Check that we had an exact integer */
-        y = PyInt_FromLong(n[i]);
+        y = PyLong_FromLong(n[i]);
         if (!y) {
             return 0;
         }
@@ -478,7 +478,7 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
         else { \
             PyObject* y_; \
             int eq_; \
-            long n_ = PyInt_AsLong(object); \
+            long n_ = PyLong_AsLong(object); \
             if (error_converting(n_)) { \
                 if (PyErr_ExceptionMatches(PyExc_TypeError)) { \
                     PyErr_Clear(); \
@@ -487,7 +487,7 @@ pyrational_new(PyTypeObject* type, PyObject* args, PyObject* kwds) {
                 } \
                 return 0; \
             } \
-            y_ = PyInt_FromLong(n_); \
+            y_ = PyLong_FromLong(n_); \
             if (!y_) { \
                 return 0; \
             } \
@@ -591,7 +591,7 @@ RATIONAL_BINOP_2(floor_divide,
     }
 RATIONAL_UNOP(negative,rational,rational_negative(x),PyRational_FromRational)
 RATIONAL_UNOP(absolute,rational,rational_abs(x),PyRational_FromRational)
-RATIONAL_UNOP(int,long,rational_int(x),PyInt_FromLong)
+RATIONAL_UNOP(int,long,rational_int(x),PyLong_FromLong)
 RATIONAL_UNOP(float,double,rational_double(x),PyFloat_FromDouble)
 
 static PyObject*
@@ -647,12 +647,12 @@ static PyNumberMethods pyrational_as_number = {
 
 static PyObject*
 pyrational_n(PyObject* self, void* closure) {
-    return PyInt_FromLong(((PyRational*)self)->r.n);
+    return PyLong_FromLong(((PyRational*)self)->r.n);
 }
 
 static PyObject*
 pyrational_d(PyObject* self, void* closure) {
-    return PyInt_FromLong(d(((PyRational*)self)->r));
+    return PyLong_FromLong(d(((PyRational*)self)->r));
 }
 
 static PyGetSetDef pyrational_getset[] = {
@@ -727,17 +727,17 @@ npyrational_setitem(PyObject* item, void* data, void* arr) {
         r = ((PyRational*)item)->r;
     }
     else {
-        long n = PyInt_AsLong(item);
+        long long n = PyLong_AsLongLong(item);
         PyObject* y;
         int eq;
         if (error_converting(n)) {
             return -1;
         }
-        y = PyInt_FromLong(n);
+        y = PyLong_FromLongLong(n);
         if (!y) {
             return -1;
         }
-        eq = PyObject_RichCompareBool(item,y,Py_EQ);
+        eq = PyObject_RichCompareBool(item, y, Py_EQ);
         Py_DECREF(y);
         if (eq<0) {
             return -1;
@@ -749,7 +749,7 @@ npyrational_setitem(PyObject* item, void* data, void* arr) {
         }
         r = make_rational_int(n);
     }
-    memcpy(data,&r,sizeof(rational));
+    memcpy(data, &r, sizeof(rational));
     return 0;
 }
 
diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src
index 273779ee8..9b04dc779 100644
--- a/numpy/core/src/umath/funcs.inc.src
+++ b/numpy/core/src/umath/funcs.inc.src
@@ -26,13 +26,13 @@ Py_square(PyObject *o)
 static PyObject *
 Py_get_one(PyObject *NPY_UNUSED(o))
 {
-    return PyInt_FromLong(1);
+    return PyLong_FromLong(1);
 }
 
 static PyObject *
 Py_reciprocal(PyObject *o)
 {
-    PyObject *one = PyInt_FromLong(1);
+    PyObject *one = PyLong_FromLong(1);
     PyObject *result;
 
     if (!one) {
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index aa6f34d59..3abeb2c5a 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -236,21 +236,6 @@ PyUFunc_ValidateCasting(PyUFuncObject *ufunc,
     return 0;
 }
 
-/*
- * Returns a new reference to type if it is already NBO, otherwise
- * returns a copy converted to NBO.
- */
-static PyArray_Descr *
-ensure_dtype_nbo(PyArray_Descr *type)
-{
-    if (PyArray_ISNBO(type->byteorder)) {
-        Py_INCREF(type);
-        return type;
-    }
-    else {
-        return PyArray_DescrNewByteorder(type, NPY_NATIVE);
-    }
-}
 
 /*UFUNC_API
  *
diff --git a/numpy/core/tests/examples/checks.pyx b/numpy/core/tests/examples/checks.pyx
index ecf0ad3fa..151979db7 100644
--- a/numpy/core/tests/examples/checks.pyx
+++ b/numpy/core/tests/examples/checks.pyx
@@ -24,3 +24,7 @@ def get_td64_value(obj):
 
 def get_dt64_unit(obj):
     return cnp.get_datetime64_unit(obj)
+
+
+def is_integer(obj):
+    return isinstance(obj, (cnp.integer, int))
diff --git a/numpy/core/tests/test_cython.py b/numpy/core/tests/test_cython.py
index 63524b269..bfdb692d7 100644
--- a/numpy/core/tests/test_cython.py
+++ b/numpy/core/tests/test_cython.py
@@ -126,3 +126,11 @@ def test_get_datetime64_unit(install_temp):
     result = checks.get_dt64_unit(td64)
     expected = 5
     assert result == expected
+
+
+def test_abstract_scalars(install_temp):
+    import checks
+
+    assert checks.is_integer(1)
+    assert checks.is_integer(np.int8(1))
+    assert checks.is_integer(np.uint64(1))
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index ae5ee4c88..f5428f98c 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -941,7 +941,7 @@ class TestTypes:
             return
 
         res = np.promote_types(dtype, dtype)
-        if res.char in "?bhilqpBHILQPefdgFDGOmM":
+        if res.char in "?bhilqpBHILQPefdgFDGOmM" or dtype.type is rational:
             # Metadata is lost for simple promotions (they create a new dtype)
             assert res.metadata is None
         else:
@@ -976,41 +976,20 @@ class TestTypes:
             # Promotion failed, this test only checks metadata
             return
 
-        # The rules for when metadata is preserved and which dtypes metadta
-        # will be used are very confusing and depend on multiple paths.
-        # This long if statement attempts to reproduce this:
-        if dtype1.type is rational or dtype2.type is rational:
-            # User dtype promotion preserves byte-order here:
-            if np.can_cast(res, dtype1):
-                assert res.metadata == dtype1.metadata
-            else:
-                assert res.metadata == dtype2.metadata
-
-        elif res.char in "?bhilqpBHILQPefdgFDGOmM":
+        if res.char in "?bhilqpBHILQPefdgFDGOmM" or res.type is rational:
             # All simple types lose metadata (due to using promotion table):
             assert res.metadata is None
-        elif res.kind in "SU" and dtype1 == dtype2:
-            # Strings give precedence to the second dtype:
-            assert res is dtype2
         elif res == dtype1:
             # If one result is the result, it is usually returned unchanged:
             assert res is dtype1
         elif res == dtype2:
-            # If one result is the result, it is usually returned unchanged:
-            assert res is dtype2
-        elif dtype1.kind == "S" and dtype2.kind == "U":
-            # Promotion creates a new unicode dtype from scratch
-            assert res.metadata is None
-        elif dtype1.kind == "U" and dtype2.kind == "S":
-            # Promotion creates a new unicode dtype from scratch
-            assert res.metadata is None
-        elif res.kind in "SU" and dtype2.kind != res.kind:
-            # We build on top of dtype1:
-            assert res.metadata == dtype1.metadata
-        elif res.kind in "SU" and res.kind == dtype1.kind:
-            assert res.metadata == dtype1.metadata
-        elif res.kind in "SU" and res.kind == dtype2.kind:
-            assert res.metadata == dtype2.metadata
+            # dtype1 may have been cast to the same type/kind as dtype2.
+            # If the resulting dtype is identical we currently pick the cast
+            # version of dtype1, which lost the metadata:
+            if np.promote_types(dtype1, dtype2.kind) == dtype2:
+                res.metadata is None
+            else:
+                res.metadata == metadata2
         else:
             assert res.metadata is None
 
@@ -1025,6 +1004,24 @@ class TestTypes:
             assert res_bs == res
         assert res_bs.metadata == res.metadata
 
+    @pytest.mark.parametrize(["dtype1", "dtype2"],
+            [[np.dtype("V6"), np.dtype("V10")],
+             [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])],
+             [np.dtype("i8,i8"), np.dtype("i4,i4")],
+            ])
+    def test_invalid_void_promotion(self, dtype1, dtype2):
+        # Mainly test structured void promotion, which currently allows
+        # byte-swapping, but nothing else:
+        with pytest.raises(TypeError):
+            np.promote_types(dtype1, dtype2)
+
+    @pytest.mark.parametrize(["dtype1", "dtype2"],
+            [[np.dtype("V10"), np.dtype("V10")],
+             [np.dtype([("name1", "<i8")]), np.dtype([("name1", ">i8")])],
+             [np.dtype("i8,i8"), np.dtype("i8,>i8")],
+            ])
+    def test_valid_void_promotion(self, dtype1, dtype2):
+        assert np.promote_types(dtype1, dtype2) is dtype1
 
     def test_can_cast(self):
         assert_(np.can_cast(np.int32, np.int64))
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index 85dc2f1e8..72ea0c388 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -152,6 +152,18 @@ class _Config:
             By default(None), treated as True if the feature contains at
             least one applicable flag. see `feature_can_autovec()`
 
+        "extra_checks": str or list, optional
+            Extra test case names for the CPU feature that need to be tested
+            against the compiler.
+
+            Each test case must have a C file named ``extra_xxxx.c``, where
+            ``xxxx`` is the case name in lower case, under 'conf_check_path'.
+            It should contain at least one intrinsic or function related to the test case.
+
+            If the compiler able to successfully compile the C file then `CCompilerOpt`
+            will add a C ``#define`` for it into the main dispatch header, e.g.
+            ```#define {conf_c_prefix}_XXXX`` where ``XXXX`` is the case name in upper case.
+
         **NOTES**:
             * space can be used as separator with options that supports "str or list"
             * case-sensitive for all values and feature name must be in upper-case.
@@ -230,7 +242,10 @@ class _Config:
         F16C   = dict(interest=11, implies="AVX"),
         FMA3   = dict(interest=12, implies="F16C"),
         AVX2   = dict(interest=13, implies="F16C"),
-        AVX512F = dict(interest=20, implies="FMA3 AVX2", implies_detect=False),
+        AVX512F = dict(
+            interest=20, implies="FMA3 AVX2", implies_detect=False,
+            extra_checks="AVX512F_REDUCE"
+        ),
         AVX512CD = dict(interest=21, implies="AVX512F"),
         AVX512_KNL = dict(
             interest=40, implies="AVX512CD", group="AVX512ER AVX512PF",
@@ -243,7 +258,8 @@ class _Config:
         ),
         AVX512_SKX = dict(
             interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ",
-            detect="AVX512_SKX", implies_detect=False
+            detect="AVX512_SKX", implies_detect=False,
+            extra_checks="AVX512BW_MASK"
         ),
         AVX512_CLX = dict(
             interest=43, implies="AVX512_SKX", group="AVX512VNNI",
@@ -673,7 +689,7 @@ class _Distutils:
         # intel and msvc compilers don't raise
         # fatal errors when flags are wrong or unsupported
         ".*("
-        "warning D9002|"  # msvc, it should be work with any language. 
+        "warning D9002|"  # msvc, it should be work with any language.
         "invalid argument for option" # intel
         ").*"
     )
@@ -1137,7 +1153,7 @@ class _Feature:
                 continue
             # list is used internally for these options
             for option in (
-                "implies", "group", "detect", "headers", "flags"
+                "implies", "group", "detect", "headers", "flags", "extra_checks"
             ) :
                 oval = feature.get(option)
                 if isinstance(oval, str):
@@ -1439,7 +1455,7 @@ class _Feature:
             self.conf_check_path, "cpu_%s.c" % name.lower()
         )
         if not os.path.exists(test_path):
-            self.dist_fatal("feature test file is not exist", path)
+            self.dist_fatal("feature test file is not exist", test_path)
 
         test = self.dist_test(test_path, force_flags + self.cc_flags["werror"])
         if not test:
@@ -1487,6 +1503,45 @@ class _Feature:
             can = valid_flags and any(valid_flags)
         return can
 
+    @_Cache.me
+    def feature_extra_checks(self, name):
+        """
+        Return a list of supported extra checks after testing them against
+        the compiler.
+
+        Parameters
+        ----------
+        names: str
+            CPU feature name in uppercase.
+        """
+        assert isinstance(name, str)
+        d = self.feature_supported[name]
+        extra_checks = d.get("extra_checks", [])
+        if not extra_checks:
+            return []
+
+        self.dist_log("Testing extra checks for feature '%s'" % name, extra_checks)
+        flags = self.feature_flags(name)
+        available = []
+        not_available = []
+        for chk in extra_checks:
+            test_path = os.path.join(
+                self.conf_check_path, "extra_%s.c" % chk.lower()
+            )
+            if not os.path.exists(test_path):
+                self.dist_fatal("extra check file does not exist", test_path)
+
+            is_supported = self.dist_test(test_path, flags + self.cc_flags["werror"])
+            if is_supported:
+                available.append(chk)
+            else:
+                not_available.append(chk)
+
+        if not_available:
+            self.dist_log("testing failed for checks", not_available, stderr=True)
+        return available
+
+
     def feature_c_preprocessor(self, feature_name, tabs=0):
         """
         Generate C preprocessor definitions and include headers of a CPU feature.
@@ -1520,14 +1575,18 @@ class _Feature:
         prepr += [
             "#include <%s>" % h for h in feature.get("headers", [])
         ]
-        group = feature.get("group", [])
-        for f in group:
-            # Guard features in case of duplicate definitions
+
+        extra_defs = feature.get("group", [])
+        extra_defs += self.feature_extra_checks(feature_name)
+        for edef in extra_defs:
+            # Guard extra definitions in case of duplicate with
+            # another feature
             prepr += [
-                "#ifndef %sHAVE_%s" % (self.conf_c_prefix, f),
-                "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, f),
+                "#ifndef %sHAVE_%s" % (self.conf_c_prefix, edef),
+                "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, edef),
                 "#endif",
             ]
+
         if tabs > 0:
             prepr = [('\t'*tabs) + l for l in prepr]
         return '\n'.join(prepr)
@@ -2127,7 +2186,7 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
 
         See Also
         --------
-        parse_targets() :
+        parse_targets :
             Parsing the configuration statements of dispatch-able sources.
         """
         to_compile = {}
@@ -2269,6 +2328,12 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
         baseline_rows.append((
             "Flags", (' '.join(baseline_flags) if baseline_flags else "none")
         ))
+        extra_checks = []
+        for name in baseline_names:
+            extra_checks += self.feature_extra_checks(name)
+        baseline_rows.append((
+            "Extra checks", (' '.join(extra_checks) if extra_checks else "none")
+        ))
 
         ########## dispatch ##########
         if self.cc_noopt:
@@ -2307,14 +2372,21 @@ class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse):
         else:
             dispatch_rows.append(("Generated", ''))
             for tar in self.feature_sorted(target_sources):
+                tar_as_seq = [tar] if isinstance(tar, str) else tar
                 sources = target_sources[tar]
                 name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar)
                 flags = ' '.join(self.feature_flags(tar))
                 implies = ' '.join(self.feature_sorted(self.feature_implies(tar)))
                 detect = ' '.join(self.feature_detect(tar))
+                extra_checks = []
+                for name in tar_as_seq:
+                    extra_checks += self.feature_extra_checks(name)
+                extra_checks = (' '.join(extra_checks) if extra_checks else "none")
+
                 dispatch_rows.append(('', ''))
                 dispatch_rows.append((name, implies))
                 dispatch_rows.append(("Flags", flags))
+                dispatch_rows.append(("Extra checks", extra_checks))
                 dispatch_rows.append(("Detect", detect))
                 for src in sources:
                     dispatch_rows.append(("", src))
diff --git a/numpy/distutils/checks/extra_avx512bw_mask.c b/numpy/distutils/checks/extra_avx512bw_mask.c
new file mode 100644
index 000000000..9cfd0c2a5
--- /dev/null
+++ b/numpy/distutils/checks/extra_avx512bw_mask.c
@@ -0,0 +1,18 @@
+#include <immintrin.h>
+/**
+ * Test BW mask operations due to:
+ *  - MSVC has supported it since vs2019 see,
+ *    https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
+ *  - Clang >= v8.0
+ *  - GCC >= v7.1
+ */
+int main(void)
+{
+    __mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1));
+    m64 = _kor_mask64(m64, m64);
+    m64 = _kxor_mask64(m64, m64);
+    m64 = _cvtu64_mask64(_cvtmask64_u64(m64));
+    m64 = _mm512_kunpackd(m64, m64);
+    m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64);
+    return (int)_cvtmask64_u64(m64);
+}
diff --git a/numpy/distutils/checks/extra_avx512f_reduce.c b/numpy/distutils/checks/extra_avx512f_reduce.c
new file mode 100644
index 000000000..f979d504e
--- /dev/null
+++ b/numpy/distutils/checks/extra_avx512f_reduce.c
@@ -0,0 +1,41 @@
+#include <immintrin.h>
+/**
+ * The following intrinsics don't have direct native support but compilers
+ * tend to emulate them.
+ * They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19
+ */
+int main(void)
+{
+    __m512  one_ps = _mm512_set1_ps(1.0f);
+    __m512d one_pd = _mm512_set1_pd(1.0);
+    __m512i one_i64 = _mm512_set1_epi64(1.0);
+    // add
+    float sum_ps  = _mm512_reduce_add_ps(one_ps);
+    double sum_pd = _mm512_reduce_add_pd(one_pd);
+    int sum_int   = (int)_mm512_reduce_add_epi64(one_i64);
+        sum_int  += (int)_mm512_reduce_add_epi32(one_i64);
+    // mul
+    sum_ps  += _mm512_reduce_mul_ps(one_ps);
+    sum_pd  += _mm512_reduce_mul_pd(one_pd);
+    sum_int += (int)_mm512_reduce_mul_epi64(one_i64);
+    sum_int += (int)_mm512_reduce_mul_epi32(one_i64);
+    // min
+    sum_ps  += _mm512_reduce_min_ps(one_ps);
+    sum_pd  += _mm512_reduce_min_pd(one_pd);
+    sum_int += (int)_mm512_reduce_min_epi32(one_i64);
+    sum_int += (int)_mm512_reduce_min_epu32(one_i64);
+    sum_int += (int)_mm512_reduce_min_epi64(one_i64);
+    // max
+    sum_ps  += _mm512_reduce_max_ps(one_ps);
+    sum_pd  += _mm512_reduce_max_pd(one_pd);
+    sum_int += (int)_mm512_reduce_max_epi32(one_i64);
+    sum_int += (int)_mm512_reduce_max_epu32(one_i64);
+    sum_int += (int)_mm512_reduce_max_epi64(one_i64);
+    // and
+    sum_int += (int)_mm512_reduce_and_epi32(one_i64);
+    sum_int += (int)_mm512_reduce_and_epi64(one_i64);
+    // or
+    sum_int += (int)_mm512_reduce_or_epi32(one_i64);
+    sum_int += (int)_mm512_reduce_or_epi64(one_i64);
+    return (int)sum_ps + (int)sum_pd + sum_int;
+}
diff --git a/numpy/distutils/system_info.py b/numpy/distutils/system_info.py
index 19f7482f2..c3bd6347c 100644
--- a/numpy/distutils/system_info.py
+++ b/numpy/distutils/system_info.py
@@ -415,6 +415,89 @@ def get_standard_file(fname):
     return filenames
 
 
+def _parse_env_order(base_order, env):
+    """ Parse an environment variable `env` by splitting with "," and only returning elements from `base_order`
+
+    This method will sequence the environment variable and check for their invidual elements in `base_order`.
+
+    The items in the environment variable may be negated via '^item' or '!itema,itemb'.
+    It must start with ^/! to negate all options.
+
+    Raises
+    ------
+    ValueError: for mixed negated and non-negated orders or multiple negated orders
+
+    Parameters
+    ----------
+    base_order : list of str
+       the base list of orders
+    env : str
+       the environment variable to be parsed, if none is found, `base_order` is returned
+
+    Returns
+    -------
+    allow_order : list of str
+        allowed orders in lower-case
+    unknown_order : list of str
+        for values not overlapping with `base_order`
+    """
+    order_str = os.environ.get(env, None)
+
+    # ensure all base-orders are lower-case (for easier comparison)
+    base_order = [order.lower() for order in base_order]
+    if order_str is None:
+        return base_order, []
+
+    neg = order_str.startswith('^') or order_str.startswith('!')
+    # Check format
+    order_str_l = list(order_str)
+    sum_neg = order_str_l.count('^') + order_str_l.count('!')
+    if neg:
+        if sum_neg > 1:
+            raise ValueError(f"Environment variable '{env}' may only contain a single (prefixed) negation: {order_str}")
+        # remove prefix
+        order_str = order_str[1:]
+    elif sum_neg > 0:
+        raise ValueError(f"Environment variable '{env}' may not mix negated an non-negated items: {order_str}")
+
+    # Split and lower case
+    orders = order_str.lower().split(',')
+
+    # to inform callee about non-overlapping elements
+    unknown_order = []
+
+    # if negated, we have to remove from the order
+    if neg:
+        allow_order = base_order.copy()
+
+        for order in orders:
+            if not order:
+                continue
+
+            if order not in base_order:
+                unknown_order.append(order)
+                continue
+
+            if order in allow_order:
+                allow_order.remove(order)
+
+    else:
+        allow_order = []
+
+        for order in orders:
+            if not order:
+                continue
+
+            if order not in base_order:
+                unknown_order.append(order)
+                continue
+
+            if order not in allow_order:
+                allow_order.append(order)
+
+    return allow_order, unknown_order
+
+
 def get_info(name, notfound_action=0):
     """
     notfound_action:
@@ -1766,24 +1849,11 @@ class lapack_opt_info(system_info):
         return getattr(self, '_calc_info_{}'.format(name))()
 
     def calc_info(self):
-        user_order = os.environ.get(self.order_env_var_name, None)
-        if user_order is None:
-            lapack_order = self.lapack_order
-        else:
-            # the user has requested the order of the
-            # check they are all in the available list, a COMMA SEPARATED list
-            user_order = user_order.lower().split(',')
-            non_existing = []
-            lapack_order = []
-            for order in user_order:
-                if order in self.lapack_order:
-                    lapack_order.append(order)
-                elif len(order) > 0:
-                    non_existing.append(order)
-            if len(non_existing) > 0:
-                raise ValueError("lapack_opt_info user defined "
-                                 "LAPACK order has unacceptable "
-                                 "values: {}".format(non_existing))
+        lapack_order, unknown_order = _parse_env_order(self.lapack_order, self.order_env_var_name)
+        if len(unknown_order) > 0:
+            raise ValueError("lapack_opt_info user defined "
+                             "LAPACK order has unacceptable "
+                             "values: {}".format(unknown_order))
 
         for lapack in lapack_order:
             if self._calc_info(lapack):
@@ -1911,22 +1981,9 @@ class blas_opt_info(system_info):
         return getattr(self, '_calc_info_{}'.format(name))()
 
     def calc_info(self):
-        user_order = os.environ.get(self.order_env_var_name, None)
-        if user_order is None:
-            blas_order = self.blas_order
-        else:
-            # the user has requested the order of the
-            # check they are all in the available list
-            user_order = user_order.lower().split(',')
-            non_existing = []
-            blas_order = []
-            for order in user_order:
-                if order in self.blas_order:
-                    blas_order.append(order)
-                elif len(order) > 0:
-                    non_existing.append(order)
-            if len(non_existing) > 0:
-                raise ValueError("blas_opt_info user defined BLAS order has unacceptable values: {}".format(non_existing))
+        blas_order, unknown_order = _parse_env_order(self.blas_order, self.order_env_var_name)
+        if len(unknown_order) > 0:
+            raise ValueError("blas_opt_info user defined BLAS order has unacceptable values: {}".format(unknown_order))
 
         for blas in blas_order:
             if self._calc_info(blas):
diff --git a/numpy/distutils/tests/test_ccompiler_opt_conf.py b/numpy/distutils/tests/test_ccompiler_opt_conf.py
index 2f83a59e0..244748e58 100644
--- a/numpy/distutils/tests/test_ccompiler_opt_conf.py
+++ b/numpy/distutils/tests/test_ccompiler_opt_conf.py
@@ -66,11 +66,12 @@ class _TestConfFeatures(FakeCCompilerOpt):
 
         self.test_implies(error_msg, search_in, feature_name, feature_dict)
         self.test_group(error_msg, search_in, feature_name, feature_dict)
+        self.test_extra_checks(error_msg, search_in, feature_name, feature_dict)
 
     def test_option_types(self, error_msg, option, val):
         for tp, available in (
             ((str, list), (
-                "implies", "headers", "flags", "group", "detect"
+                "implies", "headers", "flags", "group", "detect", "extra_checks"
             )),
             ((str,),  ("disable",)),
             ((int,),  ("interest",)),
@@ -83,29 +84,25 @@ class _TestConfFeatures(FakeCCompilerOpt):
             if not isinstance(val, tp):
                 error_tp = [t.__name__ for t in (*tp,)]
                 error_tp = ' or '.join(error_tp)
-                raise AssertionError(error_msg + \
+                raise AssertionError(error_msg +
                     "expected '%s' type for option '%s' not '%s'" % (
                      error_tp, option, type(val).__name__
                 ))
             break
 
         if not found_it:
-            raise AssertionError(error_msg + \
-                "invalid option name '%s'" % option
-            )
+            raise AssertionError(error_msg + "invalid option name '%s'" % option)
 
     def test_duplicates(self, error_msg, option, val):
         if option not in (
-            "implies", "headers", "flags", "group", "detect"
+            "implies", "headers", "flags", "group", "detect", "extra_checks"
         ) : return
 
         if isinstance(val, str):
             val = val.split()
 
         if len(val) != len(set(val)):
-            raise AssertionError(error_msg + \
-                "duplicated values in option '%s'" % option
-            )
+            raise AssertionError(error_msg + "duplicated values in option '%s'" % option)
 
     def test_implies(self, error_msg, search_in, feature_name, feature_dict):
         if feature_dict.get("disabled") is not None:
@@ -117,21 +114,15 @@ class _TestConfFeatures(FakeCCompilerOpt):
             implies = implies.split()
 
         if feature_name in implies:
-            raise AssertionError(error_msg + \
-                "feature implies itself"
-            )
+            raise AssertionError(error_msg + "feature implies itself")
 
         for impl in implies:
             impl_dict = search_in.get(impl)
             if impl_dict is not None:
                 if "disable" in impl_dict:
-                    raise AssertionError(error_msg + \
-                        "implies disabled feature '%s'" % impl
-                    )
+                    raise AssertionError(error_msg + "implies disabled feature '%s'" % impl)
                 continue
-            raise AssertionError(error_msg + \
-                "implies non-exist feature '%s'" % impl
-            )
+            raise AssertionError(error_msg + "implies non-exist feature '%s'" % impl)
 
     def test_group(self, error_msg, search_in, feature_name, feature_dict):
         if feature_dict.get("disabled") is not None:
@@ -146,10 +137,26 @@ class _TestConfFeatures(FakeCCompilerOpt):
             impl_dict = search_in.get(f)
             if not impl_dict or "disable" in impl_dict:
                 continue
-            raise AssertionError(error_msg + \
-                "in option '%s', '%s' already exists as a feature name" % (
-                option, f
-            ))
+            raise AssertionError(error_msg +
+                "in option 'group', '%s' already exists as a feature name" % f
+            )
+
+    def test_extra_checks(self, error_msg, search_in, feature_name, feature_dict):
+        if feature_dict.get("disabled") is not None:
+            return
+        extra_checks = feature_dict.get("extra_checks", "")
+        if not extra_checks:
+            return
+        if isinstance(extra_checks, str):
+            extra_checks = extra_checks.split()
+
+        for f in extra_checks:
+            impl_dict = search_in.get(f)
+            if not impl_dict or "disable" in impl_dict:
+                continue
+            raise AssertionError(error_msg +
+                "in option 'extra_checks', extra test case '%s' already exists as a feature name" % f
+            )
 
 class TestConfFeatures(unittest.TestCase):
     def __init__(self, methodName="runTest"):
diff --git a/numpy/distutils/tests/test_system_info.py b/numpy/distutils/tests/test_system_info.py
index 0768ffdde..ec15126f7 100644
--- a/numpy/distutils/tests/test_system_info.py
+++ b/numpy/distutils/tests/test_system_info.py
@@ -284,4 +284,37 @@ class TestSystemInfoReading:
             assert info.get_lib_dirs() == lib_dirs
         finally:
             os.chdir(previousDir)
-        
+
+
+def test_distutils_parse_env_order(monkeypatch):
+    from numpy.distutils.system_info import _parse_env_order
+    env = 'NPY_TESTS_DISTUTILS_PARSE_ENV_ORDER'
+
+    base_order = list('abcdef')
+
+    monkeypatch.setenv(env, 'b,i,e,f')
+    order, unknown = _parse_env_order(base_order, env)
+    assert len(order) == 3
+    assert order == list('bef')
+    assert len(unknown) == 1
+
+    # For when LAPACK/BLAS optimization is disabled
+    monkeypatch.setenv(env, '')
+    order, unknown = _parse_env_order(base_order, env)
+    assert len(order) == 0
+    assert len(unknown) == 0
+
+    for prefix in '^!':
+        monkeypatch.setenv(env, f'{prefix}b,i,e')
+        order, unknown = _parse_env_order(base_order, env)
+        assert len(order) == 4
+        assert order == list('acdf')
+        assert len(unknown) == 1
+
+    with pytest.raises(ValueError):
+        monkeypatch.setenv(env, 'b,^e,i')
+        _parse_env_order(base_order, env)
+
+    with pytest.raises(ValueError):
+        monkeypatch.setenv(env, '!b,^e,i')
+        _parse_env_order(base_order, env)
diff --git a/numpy/doc/constants.py b/numpy/doc/constants.py
index 2c629ad33..128493d90 100644
--- a/numpy/doc/constants.py
+++ b/numpy/doc/constants.py
@@ -135,10 +135,6 @@ add_newdoc('numpy', 'newaxis',
     """
     A convenient alias for None, useful for indexing arrays.
 
-    See Also
-    --------
-    `numpy.doc.indexing`
-
     Examples
     --------
     >>> newaxis is None
diff --git a/numpy/f2py/cfuncs.py b/numpy/f2py/cfuncs.py
index 9f5c73a45..26b43e7e6 100644
--- a/numpy/f2py/cfuncs.py
+++ b/numpy/f2py/cfuncs.py
@@ -286,11 +286,11 @@ static int f2py_size(PyArrayObject* var, ...)
 """
 
 cppmacros[
-    'pyobj_from_char1'] = '#define pyobj_from_char1(v) (PyInt_FromLong(v))'
+    'pyobj_from_char1'] = '#define pyobj_from_char1(v) (PyLong_FromLong(v))'
 cppmacros[
-    'pyobj_from_short1'] = '#define pyobj_from_short1(v) (PyInt_FromLong(v))'
+    'pyobj_from_short1'] = '#define pyobj_from_short1(v) (PyLong_FromLong(v))'
 needs['pyobj_from_int1'] = ['signed_char']
-cppmacros['pyobj_from_int1'] = '#define pyobj_from_int1(v) (PyInt_FromLong(v))'
+cppmacros['pyobj_from_int1'] = '#define pyobj_from_int1(v) (PyLong_FromLong(v))'
 cppmacros[
     'pyobj_from_long1'] = '#define pyobj_from_long1(v) (PyLong_FromLong(v))'
 needs['pyobj_from_long_long1'] = ['long_long']
@@ -436,9 +436,9 @@ cppmacros['GETSTRFROMPYTUPLE'] = """\
         PyObject *rv_cb_str = PyTuple_GetItem((tuple),(index));\\
         if (rv_cb_str == NULL)\\
             goto capi_fail;\\
-        if (PyString_Check(rv_cb_str)) {\\
+        if (PyBytes_Check(rv_cb_str)) {\\
             str[len-1]='\\0';\\
-            STRINGCOPYN((str),PyString_AS_STRING((PyStringObject*)rv_cb_str),(len));\\
+            STRINGCOPYN((str),PyBytes_AS_STRING((PyBytesObject*)rv_cb_str),(len));\\
         } else {\\
             PRINTPYOBJERR(rv_cb_str);\\
             PyErr_SetString(#modulename#_error,\"string object expected\");\\
@@ -629,7 +629,9 @@ capi_fail:
 """
 needs['string_from_pyobj'] = ['string', 'STRINGMALLOC', 'STRINGCOPYN']
 cfuncs['string_from_pyobj'] = """\
-static int string_from_pyobj(string *str,int *len,const string inistr,PyObject *obj,const char *errmess) {
+static int
+string_from_pyobj(string *str,int *len,const string inistr,PyObject *obj,const char *errmess)
+{
     PyArrayObject *arr = NULL;
     PyObject *tmp = NULL;
 #ifdef DEBUGCFUNCS
@@ -655,7 +657,7 @@ fprintf(stderr,\"string_from_pyobj(str='%s',len=%d,inistr='%s',obj=%p)\\n\",(cha
         STRINGCOPYN(*str,PyArray_DATA(arr),*len+1);
         return 1;
     }
-    if (PyString_Check(obj)) {
+    if (PyBytes_Check(obj)) {
         tmp = obj;
         Py_INCREF(tmp);
     }
@@ -675,200 +677,244 @@ fprintf(stderr,\"string_from_pyobj(str='%s',len=%d,inistr='%s',obj=%p)\\n\",(cha
     }
     if (tmp == NULL) goto capi_fail;
     if (*len == -1)
-        *len = PyString_GET_SIZE(tmp);
+        *len = PyBytes_GET_SIZE(tmp);
     STRINGMALLOC(*str,*len);
-    STRINGCOPYN(*str,PyString_AS_STRING(tmp),*len+1);
+    STRINGCOPYN(*str,PyBytes_AS_STRING(tmp),*len+1);
     Py_DECREF(tmp);
     return 1;
 capi_fail:
     Py_XDECREF(tmp);
     {
         PyObject* err = PyErr_Occurred();
-        if (err==NULL) err = #modulename#_error;
-        PyErr_SetString(err,errmess);
+        if (err == NULL) {
+            err = #modulename#_error;
+        }
+        PyErr_SetString(err, errmess);
     }
     return 0;
 }
 """
+
+
 needs['char_from_pyobj'] = ['int_from_pyobj']
 cfuncs['char_from_pyobj'] = """\
-static int char_from_pyobj(char* v,PyObject *obj,const char *errmess) {
-    int i=0;
-    if (int_from_pyobj(&i,obj,errmess)) {
+static int
+char_from_pyobj(char* v, PyObject *obj, const char *errmess) {
+    int i = 0;
+    if (int_from_pyobj(&i, obj, errmess)) {
         *v = (char)i;
         return 1;
     }
     return 0;
 }
 """
+
+
 needs['signed_char_from_pyobj'] = ['int_from_pyobj', 'signed_char']
 cfuncs['signed_char_from_pyobj'] = """\
-static int signed_char_from_pyobj(signed_char* v,PyObject *obj,const char *errmess) {
-    int i=0;
-    if (int_from_pyobj(&i,obj,errmess)) {
+static int
+signed_char_from_pyobj(signed_char* v, PyObject *obj, const char *errmess) {
+    int i = 0;
+    if (int_from_pyobj(&i, obj, errmess)) {
         *v = (signed_char)i;
         return 1;
     }
     return 0;
 }
 """
+
+
 needs['short_from_pyobj'] = ['int_from_pyobj']
 cfuncs['short_from_pyobj'] = """\
-static int short_from_pyobj(short* v,PyObject *obj,const char *errmess) {
-    int i=0;
-    if (int_from_pyobj(&i,obj,errmess)) {
+static int
+short_from_pyobj(short* v, PyObject *obj, const char *errmess) {
+    int i = 0;
+    if (int_from_pyobj(&i, obj, errmess)) {
         *v = (short)i;
         return 1;
     }
     return 0;
 }
 """
+
+
 cfuncs['int_from_pyobj'] = """\
-static int int_from_pyobj(int* v,PyObject *obj,const char *errmess) {
+static int
+int_from_pyobj(int* v, PyObject *obj, const char *errmess)
+{
     PyObject* tmp = NULL;
-    if (PyInt_Check(obj)) {
-        *v = (int)PyInt_AS_LONG(obj);
-        return 1;
+
+    if (PyLong_Check(obj)) {
+        *v = Npy__PyLong_AsInt(obj);
+        return !(*v == -1 && PyErr_Occurred());
     }
-    tmp = PyNumber_Int(obj);
+
+    tmp = PyNumber_Long(obj);
     if (tmp) {
-        *v = PyInt_AS_LONG(tmp);
+        *v = Npy__PyLong_AsInt(tmp);
         Py_DECREF(tmp);
-        return 1;
+        return !(*v == -1 && PyErr_Occurred());
     }
+
     if (PyComplex_Check(obj))
         tmp = PyObject_GetAttrString(obj,\"real\");
-    else if (PyString_Check(obj) || PyUnicode_Check(obj))
+    else if (PyBytes_Check(obj) || PyUnicode_Check(obj))
         /*pass*/;
     else if (PySequence_Check(obj))
-        tmp = PySequence_GetItem(obj,0);
+        tmp = PySequence_GetItem(obj, 0);
     if (tmp) {
         PyErr_Clear();
-        if (int_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;}
+        if (int_from_pyobj(v, tmp, errmess)) {
+            Py_DECREF(tmp);
+            return 1;
+        }
         Py_DECREF(tmp);
     }
     {
         PyObject* err = PyErr_Occurred();
-        if (err==NULL) err = #modulename#_error;
-        PyErr_SetString(err,errmess);
+        if (err == NULL) {
+            err = #modulename#_error;
+        }
+        PyErr_SetString(err, errmess);
     }
     return 0;
 }
 """
+
+
 cfuncs['long_from_pyobj'] = """\
-static int long_from_pyobj(long* v,PyObject *obj,const char *errmess) {
+static int
+long_from_pyobj(long* v, PyObject *obj, const char *errmess) {
     PyObject* tmp = NULL;
-    if (PyInt_Check(obj)) {
-        *v = PyInt_AS_LONG(obj);
-        return 1;
+
+    if (PyLong_Check(obj)) {
+        *v = PyLong_AsLong(obj);
+        return !(*v == -1 && PyErr_Occurred());
     }
-    tmp = PyNumber_Int(obj);
+
+    tmp = PyNumber_Long(obj);
     if (tmp) {
-        *v = PyInt_AS_LONG(tmp);
+        *v = PyLong_AsLong(tmp);
         Py_DECREF(tmp);
-        return 1;
+        return !(*v == -1 && PyErr_Occurred());
     }
+
     if (PyComplex_Check(obj))
         tmp = PyObject_GetAttrString(obj,\"real\");
-    else if (PyString_Check(obj) || PyUnicode_Check(obj))
+    else if (PyBytes_Check(obj) || PyUnicode_Check(obj))
         /*pass*/;
     else if (PySequence_Check(obj))
         tmp = PySequence_GetItem(obj,0);
+
     if (tmp) {
         PyErr_Clear();
-        if (long_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;}
+        if (long_from_pyobj(v, tmp, errmess)) {
+            Py_DECREF(tmp);
+            return 1;
+        }
         Py_DECREF(tmp);
     }
     {
         PyObject* err = PyErr_Occurred();
-        if (err==NULL) err = #modulename#_error;
-        PyErr_SetString(err,errmess);
+        if (err == NULL) {
+            err = #modulename#_error;
+        }
+        PyErr_SetString(err, errmess);
     }
     return 0;
 }
 """
+
+
 needs['long_long_from_pyobj'] = ['long_long']
 cfuncs['long_long_from_pyobj'] = """\
-static int long_long_from_pyobj(long_long* v,PyObject *obj,const char *errmess) {
+static int
+long_long_from_pyobj(long_long* v, PyObject *obj, const char *errmess)
+{
     PyObject* tmp = NULL;
+
     if (PyLong_Check(obj)) {
         *v = PyLong_AsLongLong(obj);
-        return (!PyErr_Occurred());
-    }
-    if (PyInt_Check(obj)) {
-        *v = (long_long)PyInt_AS_LONG(obj);
-        return 1;
+        return !(*v == -1 && PyErr_Occurred());
     }
+
     tmp = PyNumber_Long(obj);
     if (tmp) {
         *v = PyLong_AsLongLong(tmp);
         Py_DECREF(tmp);
-        return (!PyErr_Occurred());
+        return !(*v == -1 && PyErr_Occurred());
     }
+
     if (PyComplex_Check(obj))
         tmp = PyObject_GetAttrString(obj,\"real\");
-    else if (PyString_Check(obj) || PyUnicode_Check(obj))
+    else if (PyBytes_Check(obj) || PyUnicode_Check(obj))
         /*pass*/;
     else if (PySequence_Check(obj))
         tmp = PySequence_GetItem(obj,0);
     if (tmp) {
         PyErr_Clear();
-        if (long_long_from_pyobj(v,tmp,errmess)) {Py_DECREF(tmp); return 1;}
+        if (long_long_from_pyobj(v, tmp, errmess)) {
+            Py_DECREF(tmp);
+            return 1;
+        }
         Py_DECREF(tmp);
     }
     {
         PyObject* err = PyErr_Occurred();
-        if (err==NULL) err = #modulename#_error;
+        if (err == NULL) {
+            err = #modulename#_error;
+        }
         PyErr_SetString(err,errmess);
     }
     return 0;
 }
 """
+
+
 needs['long_double_from_pyobj'] = ['double_from_pyobj', 'long_double']
 cfuncs['long_double_from_pyobj'] = """\
-static int long_double_from_pyobj(long_double* v,PyObject *obj,const char *errmess) {
+static int
+long_double_from_pyobj(long_double* v, PyObject *obj, const char *errmess)
+{
     double d=0;
     if (PyArray_CheckScalar(obj)){
         if PyArray_IsScalar(obj, LongDouble) {
             PyArray_ScalarAsCtype(obj, v);
             return 1;
         }
-        else if (PyArray_Check(obj) && PyArray_TYPE(obj)==NPY_LONGDOUBLE) {
+        else if (PyArray_Check(obj) && PyArray_TYPE(obj) == NPY_LONGDOUBLE) {
             (*v) = *((npy_longdouble *)PyArray_DATA(obj));
             return 1;
         }
     }
-    if (double_from_pyobj(&d,obj,errmess)) {
+    if (double_from_pyobj(&d, obj, errmess)) {
         *v = (long_double)d;
         return 1;
     }
     return 0;
 }
 """
+
+
 cfuncs['double_from_pyobj'] = """\
-static int double_from_pyobj(double* v,PyObject *obj,const char *errmess) {
+static int
+double_from_pyobj(double* v, PyObject *obj, const char *errmess)
+{
     PyObject* tmp = NULL;
     if (PyFloat_Check(obj)) {
-#ifdef __sgi
         *v = PyFloat_AsDouble(obj);
-#else
-        *v = PyFloat_AS_DOUBLE(obj);
-#endif
-        return 1;
+        return !(*v == -1.0 && PyErr_Occurred());
     }
+
     tmp = PyNumber_Float(obj);
     if (tmp) {
-#ifdef __sgi
         *v = PyFloat_AsDouble(tmp);
-#else
-        *v = PyFloat_AS_DOUBLE(tmp);
-#endif
         Py_DECREF(tmp);
-        return 1;
+        return !(*v == -1.0 && PyErr_Occurred());
     }
     if (PyComplex_Check(obj))
         tmp = PyObject_GetAttrString(obj,\"real\");
-    else if (PyString_Check(obj) || PyUnicode_Check(obj))
+    else if (PyBytes_Check(obj) || PyUnicode_Check(obj))
         /*pass*/;
     else if (PySequence_Check(obj))
         tmp = PySequence_GetItem(obj,0);
@@ -885,9 +931,13 @@ static int double_from_pyobj(double* v,PyObject *obj,const char *errmess) {
     return 0;
 }
 """
+
+
 needs['float_from_pyobj'] = ['double_from_pyobj']
 cfuncs['float_from_pyobj'] = """\
-static int float_from_pyobj(float* v,PyObject *obj,const char *errmess) {
+static int
+float_from_pyobj(float* v, PyObject *obj, const char *errmess)
+{
     double d=0.0;
     if (double_from_pyobj(&d,obj,errmess)) {
         *v = (float)d;
@@ -896,11 +946,15 @@ static int float_from_pyobj(float* v,PyObject *obj,const char *errmess) {
     return 0;
 }
 """
+
+
 needs['complex_long_double_from_pyobj'] = ['complex_long_double', 'long_double',
                                            'complex_double_from_pyobj']
 cfuncs['complex_long_double_from_pyobj'] = """\
-static int complex_long_double_from_pyobj(complex_long_double* v,PyObject *obj,const char *errmess) {
-    complex_double cd={0.0,0.0};
+static int
+complex_long_double_from_pyobj(complex_long_double* v, PyObject *obj, const char *errmess)
+{
+    complex_double cd = {0.0,0.0};
     if (PyArray_CheckScalar(obj)){
         if PyArray_IsScalar(obj, CLongDouble) {
             PyArray_ScalarAsCtype(obj, v);
@@ -920,13 +974,17 @@ static int complex_long_double_from_pyobj(complex_long_double* v,PyObject *obj,c
     return 0;
 }
 """
+
+
 needs['complex_double_from_pyobj'] = ['complex_double']
 cfuncs['complex_double_from_pyobj'] = """\
-static int complex_double_from_pyobj(complex_double* v,PyObject *obj,const char *errmess) {
+static int
+complex_double_from_pyobj(complex_double* v, PyObject *obj, const char *errmess) {
     Py_complex c;
     if (PyComplex_Check(obj)) {
-        c=PyComplex_AsCComplex(obj);
-        (*v).r=c.real, (*v).i=c.imag;
+        c = PyComplex_AsCComplex(obj);
+        (*v).r = c.real;
+        (*v).i = c.imag;
         return 1;
     }
     if (PyArray_IsScalar(obj, ComplexFloating)) {
@@ -955,30 +1013,24 @@ static int complex_double_from_pyobj(complex_double* v,PyObject *obj,const char
         else {
             arr = PyArray_FromScalar(obj, PyArray_DescrFromType(NPY_CDOUBLE));
         }
-        if (arr==NULL) return 0;
+        if (arr == NULL) {
+            return 0;
+        }
         (*v).r = ((npy_cdouble *)PyArray_DATA(arr))->real;
         (*v).i = ((npy_cdouble *)PyArray_DATA(arr))->imag;
         return 1;
     }
     /* Python does not provide PyNumber_Complex function :-( */
-    (*v).i=0.0;
+    (*v).i = 0.0;
     if (PyFloat_Check(obj)) {
-#ifdef __sgi
         (*v).r = PyFloat_AsDouble(obj);
-#else
-        (*v).r = PyFloat_AS_DOUBLE(obj);
-#endif
-        return 1;
-    }
-    if (PyInt_Check(obj)) {
-        (*v).r = (double)PyInt_AS_LONG(obj);
-        return 1;
+        return !((*v).r == -1.0 && PyErr_Occurred());
     }
     if (PyLong_Check(obj)) {
         (*v).r = PyLong_AsDouble(obj);
-        return (!PyErr_Occurred());
+        return !((*v).r == -1.0 && PyErr_Occurred());
     }
-    if (PySequence_Check(obj) && !(PyString_Check(obj) || PyUnicode_Check(obj))) {
+    if (PySequence_Check(obj) && !(PyBytes_Check(obj) || PyUnicode_Check(obj))) {
         PyObject *tmp = PySequence_GetItem(obj,0);
         if (tmp) {
             if (complex_double_from_pyobj(v,tmp,errmess)) {
@@ -997,10 +1049,14 @@ static int complex_double_from_pyobj(complex_double* v,PyObject *obj,const char
     return 0;
 }
 """
+
+
 needs['complex_float_from_pyobj'] = [
     'complex_float', 'complex_double_from_pyobj']
 cfuncs['complex_float_from_pyobj'] = """\
-static int complex_float_from_pyobj(complex_float* v,PyObject *obj,const char *errmess) {
+static int
+complex_float_from_pyobj(complex_float* v,PyObject *obj,const char *errmess)
+{
     complex_double cd={0.0,0.0};
     if (complex_double_from_pyobj(&cd,obj,errmess)) {
         (*v).r = (float)cd.r;
@@ -1010,6 +1066,8 @@ static int complex_float_from_pyobj(complex_float* v,PyObject *obj,const char *e
     return 0;
 }
 """
+
+
 needs['try_pyarr_from_char'] = ['pyobj_from_char1', 'TRYPYARRAYTEMPLATE']
 cfuncs[
     'try_pyarr_from_char'] = 'static int try_pyarr_from_char(PyObject* obj,char* v) {\n    TRYPYARRAYTEMPLATE(char,\'c\');\n}\n'
@@ -1047,14 +1105,18 @@ needs['try_pyarr_from_complex_double'] = [
 cfuncs[
     'try_pyarr_from_complex_double'] = 'static int try_pyarr_from_complex_double(PyObject* obj,complex_double* v) {\n    TRYCOMPLEXPYARRAYTEMPLATE(double,\'D\');\n}\n'
 
-needs['create_cb_arglist'] = ['CFUNCSMESS', 'PRINTPYOBJERR', 'MINMAX']
 
+needs['create_cb_arglist'] = ['CFUNCSMESS', 'PRINTPYOBJERR', 'MINMAX']
 # create the list of arguments to be used when calling back to python
 cfuncs['create_cb_arglist'] = """\
-static int create_cb_arglist(PyObject* fun,PyTupleObject* xa,const int maxnofargs,const int nofoptargs,int *nofargs,PyTupleObject **args,const char *errmess) {
+static int
+create_cb_arglist(PyObject* fun, PyTupleObject* xa , const int maxnofargs,
+                  const int nofoptargs, int *nofargs, PyTupleObject **args,
+                  const char *errmess)
+{
     PyObject *tmp = NULL;
     PyObject *tmp_fun = NULL;
-    int tot,opt,ext,siz,i,di=0;
+    Py_ssize_t tot, opt, ext, siz, i, di = 0;
     CFUNCSMESS(\"create_cb_arglist\\n\");
     tot=opt=ext=siz=0;
     /* Get the total number of arguments */
@@ -1103,10 +1165,15 @@ static int create_cb_arglist(PyObject* fun,PyTupleObject* xa,const int maxnofarg
             Py_INCREF(tmp_fun);
         }
     }
-if (tmp_fun==NULL) {
-fprintf(stderr,\"Call-back argument must be function|instance|instance.__call__|f2py-function but got %s.\\n\",(fun==NULL?\"NULL\":Py_TYPE(fun)->tp_name));
-goto capi_fail;
-}
+
+    if (tmp_fun == NULL) {
+        fprintf(stderr,
+                \"Call-back argument must be function|instance|instance.__call__|f2py-function \"
+                \"but got %s.\\n\",
+                ((fun == NULL) ? \"NULL\" : Py_TYPE(fun)->tp_name));
+        goto capi_fail;
+    }
+
     if (PyObject_HasAttrString(tmp_fun,\"__code__\")) {
         if (PyObject_HasAttrString(tmp = PyObject_GetAttrString(tmp_fun,\"__code__\"),\"co_argcount\")) {
             PyObject *tmp_argcount = PyObject_GetAttrString(tmp,\"co_argcount\");
@@ -1114,7 +1181,7 @@ goto capi_fail;
             if (tmp_argcount == NULL) {
                 goto capi_fail;
             }
-            tot = PyInt_AsLong(tmp_argcount) - di;
+            tot = PyLong_AsSsize_t(tmp_argcount) - di;
             Py_DECREF(tmp_argcount);
         }
     }
@@ -1130,13 +1197,23 @@ goto capi_fail;
     /* Calculate the size of call-backs argument list */
     siz = MIN(maxnofargs+ext,tot);
     *nofargs = MAX(0,siz-ext);
+
 #ifdef DEBUGCFUNCS
-    fprintf(stderr,\"debug-capi:create_cb_arglist:maxnofargs(-nofoptargs),tot,opt,ext,siz,nofargs=%d(-%d),%d,%d,%d,%d,%d\\n\",maxnofargs,nofoptargs,tot,opt,ext,siz,*nofargs);
+    fprintf(stderr,
+            \"debug-capi:create_cb_arglist:maxnofargs(-nofoptargs),\"
+            \"tot,opt,ext,siz,nofargs = %d(-%d), %zd, %zd, %zd, %zd, %d\\n\",
+            maxnofargs, nofoptargs, tot, opt, ext, siz, *nofargs);
 #endif
-    if (siz<tot-opt) {
-        fprintf(stderr,\"create_cb_arglist: Failed to build argument list (siz) with enough arguments (tot-opt) required by user-supplied function (siz,tot,opt=%d,%d,%d).\\n\",siz,tot,opt);
+
+    if (siz < tot-opt) {
+        fprintf(stderr,
+                \"create_cb_arglist: Failed to build argument list \"
+                \"(siz) with enough arguments (tot-opt) required by \"
+                \"user-supplied function (siz,tot,opt=%zd, %zd, %zd).\\n\",
+                siz, tot, opt);
         goto capi_fail;
     }
+
     /* Initialize argument list */
     *args = (PyTupleObject *)PyTuple_New(siz);
     for (i=0;i<*nofargs;i++) {
@@ -1152,9 +1229,10 @@ goto capi_fail;
     CFUNCSMESS(\"create_cb_arglist-end\\n\");
     Py_DECREF(tmp_fun);
     return 1;
+
 capi_fail:
-    if ((PyErr_Occurred())==NULL)
-        PyErr_SetString(#modulename#_error,errmess);
+    if (PyErr_Occurred() == NULL)
+        PyErr_SetString(#modulename#_error, errmess);
     Py_XDECREF(tmp_fun);
     return 0;
 }
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index f1ec38c5c..c7ddbdb8d 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1450,7 +1450,7 @@ def angle(z, deg=False):
         The counterclockwise angle from the positive real axis on the complex
         plane in the range ``(-pi, pi]``, with dtype as numpy.float64.
 
-        ..versionchanged:: 1.16.0
+        .. versionchanged:: 1.16.0
             This function works on subclasses of ndarray like `ma.array`.
 
     See Also
@@ -4229,10 +4229,9 @@ def meshgrid(*xi, copy=True, sparse=False, indexing='xy'):
 
     See Also
     --------
-    index_tricks.mgrid : Construct a multi-dimensional "meshgrid"
-                     using indexing notation.
-    index_tricks.ogrid : Construct an open multi-dimensional "meshgrid"
-                     using indexing notation.
+    mgrid : Construct a multi-dimensional "meshgrid" using indexing notation.
+    ogrid : Construct an open multi-dimensional "meshgrid" using indexing 
+            notation.
 
     Examples
     --------
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 003550432..409016adb 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -244,7 +244,7 @@ def nanmin(a, axis=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
         expected output, but the type will be cast if necessary. See
-        `ufuncs-output-type` for more details.
+        :ref:`ufuncs-output-type` for more details.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -359,7 +359,7 @@ def nanmax(a, axis=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
         expected output, but the type will be cast if necessary. See
-        `ufuncs-output-type` for more details.
+        :ref:`ufuncs-output-type` for more details.
 
         .. versionadded:: 1.8.0
     keepdims : bool, optional
@@ -584,7 +584,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``. If provided, it must have the same shape as the
         expected output, but the type will be cast if necessary.  See
-        `ufuncs-output-type` for more details. The casting of NaN to integer
+        :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
         can yield unexpected results.
 
         .. versionadded:: 1.8.0
@@ -681,7 +681,7 @@ def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``. If provided, it must have the same shape as the
         expected output, but the type will be cast if necessary. See
-        `ufuncs-output-type` for more details. The casting of NaN to integer
+        :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
         can yield unexpected results.
     keepdims : bool, optional
         If True, the axes which are reduced are left in the result as
@@ -749,7 +749,7 @@ def nancumsum(a, axis=None, dtype=None, out=None):
     out : ndarray, optional
         Alternative output array in which to place the result. It must
         have the same shape and buffer length as the expected output
-        but the type will be cast if necessary. See `ufuncs-output-type` for
+        but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
         more details.
 
     Returns
@@ -888,7 +888,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue):
         Alternate output array in which to place the result.  The default
         is ``None``; if provided, it must have the same shape as the
         expected output, but the type will be cast if necessary. See
-        `ufuncs-output-type` for more details.
+        :ref:`ufuncs-output-type` for more details.
     keepdims : bool, optional
         If this is set to True, the axes which are reduced are left
         in the result as dimensions with size one. With this option,
@@ -1256,7 +1256,7 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
     Compute the qth quantile of the data along the specified axis,
     while ignoring nan values.
     Returns the qth quantile(s) of the array elements.
-    
+
     .. versionadded:: 1.15.0
 
     Parameters
@@ -1472,7 +1472,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     mean : Average
     var : Variance while not ignoring NaNs
     nanstd, nanmean
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Notes
     -----
@@ -1624,7 +1624,7 @@ def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue):
     --------
     var, mean, std
     nanvar, nanmean
-    ufuncs-output-type
+    :ref:`ufuncs-output-type`
 
     Notes
     -----
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index c6a19fda9..805e59bc1 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -86,7 +86,7 @@ class BagObj:
         try:
             return object.__getattribute__(self, '_obj')[key]
         except KeyError:
-            raise AttributeError(key)
+            raise AttributeError(key) from None
 
     def __dir__(self):
         """
@@ -446,9 +446,9 @@ def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True,
                                  "when allow_pickle=False")
             try:
                 return pickle.load(fid, **pickle_kwargs)
-            except Exception:
+            except Exception as e:
                 raise IOError(
-                    "Failed to interpret file %s as a pickle" % repr(file))
+                    "Failed to interpret file %s as a pickle" % repr(file)) from e
 
 
 def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
@@ -815,8 +815,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             fourth column the same way as ``usecols = (3,)`` would.
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
-        unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
-        data-type, arrays are returned for each field.  Default is False.
+        unpacked using ``x, y, z = loadtxt(...)``.  When used with a
+        structured data-type, arrays are returned for each field.
+        Default is False.
     ndmin : int, optional
         The returned array will have at least `ndmin` dimensions.
         Otherwise mono-dimensional axes will be squeezed.
@@ -1434,10 +1435,10 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
             for row in X:
                 try:
                     v = format % tuple(row) + newline
-                except TypeError:
+                except TypeError as e:
                     raise TypeError("Mismatch between array dtype ('%s') and "
                                     "format specifier ('%s')"
-                                    % (str(X.dtype), format))
+                                    % (str(X.dtype), format)) from e
                 fh.write(v)
 
         if len(footer) > 0:
@@ -1640,7 +1641,9 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         If 'lower', field names are converted to lower case.
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
-        unpacked using ``x, y, z = loadtxt(...)``
+        unpacked using ``x, y, z = genfromtxt(...)``.  When used with a
+        structured data-type, arrays are returned for each field.
+        Default is False.
     usemask : bool, optional
         If True, return a masked array.
         If False, return a regular array.
@@ -2269,9 +2272,18 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     if usemask:
         output = output.view(MaskedArray)
         output._mask = outputmask
+    output = np.squeeze(output)
     if unpack:
-        return output.squeeze().T
-    return output.squeeze()
+        if names is None:
+            return output.T
+        elif len(names) == 1:
+            # squeeze single-name dtypes too
+            return output[names[0]]
+        else:
+            # For structured arrays with multiple fields,
+            # return an array for each field.
+            return [output[field] for field in names]
+    return output
 
 
 _genfromtxt_with_like = array_function_dispatch(
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index cfc5dc9ca..fbfbca73d 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -513,7 +513,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
 
     Nested fields are supported.
 
-    ..versionchanged: 1.18.0
+    .. versionchanged:: 1.18.0
         `drop_fields` returns an array with 0 fields if all fields are dropped,
         rather than returning ``None`` as it did previously.
 
diff --git a/numpy/lib/scimath.py b/numpy/lib/scimath.py
index 555a3d5a8..2b0d38c37 100644
--- a/numpy/lib/scimath.py
+++ b/numpy/lib/scimath.py
@@ -14,6 +14,22 @@ module provide the mathematically valid answers in the complex plane::
 Similarly, `sqrt`, other base logarithms, `power` and trig functions are
 correctly handled.  See their respective docstrings for specific examples.
 
+Functions
+---------
+
+.. autosummary::
+   :toctree: generated/
+
+   sqrt
+   log
+   log2
+   logn
+   log10
+   power
+   arccos
+   arcsin
+   arctanh
+
 """
 import numpy.core.numeric as nx
 import numpy.core.numerictypes as nt
diff --git a/numpy/lib/tests/test_financial_expired.py b/numpy/lib/tests/test_financial_expired.py
index 66bb08026..70b0cd790 100644
--- a/numpy/lib/tests/test_financial_expired.py
+++ b/numpy/lib/tests/test_financial_expired.py
@@ -7,7 +7,7 @@ import numpy as np
                     reason="requires python 3.7 or higher")
 def test_financial_expired():
     match = 'NEP 32'
-    with pytest.warns(RuntimeWarning, match=match):
+    with pytest.warns(DeprecationWarning, match=match):
         func = np.fv
     with pytest.raises(RuntimeError, match=match):
         func(1, 2, 3)
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 38d698df4..aa4499764 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -1026,7 +1026,7 @@ class TestLoadTxt(LoadTxtBase):
         a = np.array([b'start ', b'  ', b''])
         assert_array_equal(x['comment'], a)
 
-    def test_structure_unpack(self):
+    def test_unpack_structured(self):
         txt = TextIO("M 21 72\nF 35 58")
         dt = {'names': ('a', 'b', 'c'), 'formats': ('|S1', '<i4', '<f4')}
         a, b, c = np.loadtxt(txt, dtype=dt, unpack=True)
@@ -2358,6 +2358,51 @@ M   33  21.99
         assert_equal(test['f1'], 17179869184)
         assert_equal(test['f2'], 1024)
 
+    def test_unpack_structured(self):
+        # Regression test for gh-4341
+        # Unpacking should work on structured arrays
+        txt = TextIO("M 21 72\nF 35 58")
+        dt = {'names': ('a', 'b', 'c'), 'formats': ('S1', 'i4', 'f4')}
+        a, b, c = np.genfromtxt(txt, dtype=dt, unpack=True)
+        assert_equal(a.dtype, np.dtype('S1'))
+        assert_equal(b.dtype, np.dtype('i4'))
+        assert_equal(c.dtype, np.dtype('f4'))
+        assert_array_equal(a, np.array([b'M', b'F']))
+        assert_array_equal(b, np.array([21, 35]))
+        assert_array_equal(c, np.array([72.,  58.]))
+
+    def test_unpack_auto_dtype(self):
+        # Regression test for gh-4341
+        # Unpacking should work when dtype=None
+        txt = TextIO("M 21 72.\nF 35 58.")
+        expected = (np.array(["M", "F"]), np.array([21, 35]), np.array([72., 58.]))
+        test = np.genfromtxt(txt, dtype=None, unpack=True, encoding="utf-8")
+        for arr, result in zip(expected, test):
+            assert_array_equal(arr, result)
+            assert_equal(arr.dtype, result.dtype)
+
+    def test_unpack_single_name(self):
+        # Regression test for gh-4341
+        # Unpacking should work when structured dtype has only one field
+        txt = TextIO("21\n35")
+        dt = {'names': ('a',), 'formats': ('i4',)}
+        expected = np.array([21, 35], dtype=np.int32)
+        test = np.genfromtxt(txt, dtype=dt, unpack=True)
+        assert_array_equal(expected, test)
+        assert_equal(expected.dtype, test.dtype)
+
+    def test_squeeze_scalar(self):
+        # Regression test for gh-4341
+        # Unpacking a scalar should give zero-dim output,
+        # even if dtype is structured
+        txt = TextIO("1")
+        dt = {'names': ('a',), 'formats': ('i4',)}
+        expected = np.array((1,), dtype=np.int32)
+        test = np.genfromtxt(txt, dtype=dt, unpack=True)
+        assert_array_equal(expected, test)
+        assert_equal((), test.shape)
+        assert_equal(expected.dtype, test.dtype)
+
 
 class TestPathUsage:
     # Test that pathlib.Path can be used
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index 92f93d671..b6d860dfa 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -2206,8 +2206,8 @@ def lstsq(a, b, rcond="warn"):
         Least-squares solution. If `b` is two-dimensional,
         the solutions are in the `K` columns of `x`.
     residuals : {(1,), (K,), (0,)} ndarray
-        Sums of residuals; squared Euclidean 2-norm for each column in
-        ``b - a*x``.
+        Sums of squared residuals: Squared Euclidean 2-norm for each column in
+        ``b - a @ x``.
         If the rank of `a` is < N or M <= N, this is an empty array.
         If `b` is 1-dimensional, this is a (1,) shape array.
         Otherwise the shape is (K,).
@@ -2558,7 +2558,7 @@ def norm(x, ord=None, axis=None, keepdims=False):
             # special case for speedup
             s = (x.conj() * x).real
             return sqrt(add.reduce(s, axis=axis, keepdims=keepdims))
-        # None of the str-type keywords for ord ('fro', 'nuc') 
+        # None of the str-type keywords for ord ('fro', 'nuc')
         # are valid for vectors
         elif isinstance(ord, str):
             raise ValueError(f"Invalid norm order '{ord}' for vectors")
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 8ede29da1..613bcb550 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -1641,7 +1641,7 @@ def flatnotmasked_contiguous(a):
     slice_list : list
         A sorted sequence of `slice` objects (start index, end index).
 
-        ..versionchanged:: 1.15.0
+        .. versionchanged:: 1.15.0
             Now returns an empty list instead of None for a fully masked array
 
     See Also
diff --git a/numpy/polynomial/chebyshev.py b/numpy/polynomial/chebyshev.py
index d99fd98f5..6745c9371 100644
--- a/numpy/polynomial/chebyshev.py
+++ b/numpy/polynomial/chebyshev.py
@@ -477,8 +477,6 @@ def chebline(off, scl):
     """
     Chebyshev series whose graph is a straight line.
 
-
-
     Parameters
     ----------
     off, scl : scalars
@@ -492,7 +490,11 @@ def chebline(off, scl):
 
     See Also
     --------
-    polyline
+    numpy.polynomial.polynomial.polyline
+    numpy.polynomial.legendre.legline
+    numpy.polynomial.laguerre.lagline
+    numpy.polynomial.hermite.hermline
+    numpy.polynomial.hermite_e.hermeline
 
     Examples
     --------
@@ -545,7 +547,11 @@ def chebfromroots(roots):
 
     See Also
     --------
-    polyfromroots, legfromroots, lagfromroots, hermfromroots, hermefromroots
+    numpy.polynomial.polynomial.polyfromroots
+    numpy.polynomial.legendre.legfromroots
+    numpy.polynomial.laguerre.lagfromroots
+    numpy.polynomial.hermite.hermfromroots
+    numpy.polynomial.hermite_e.hermefromroots
 
     Examples
     --------
@@ -764,7 +770,7 @@ def chebdiv(c1, c2):
 
     See Also
     --------
-    chebadd, chebsub, chemulx, chebmul, chebpow
+    chebadd, chebsub, chebmulx, chebmul, chebpow
 
     Notes
     -----
@@ -1601,7 +1607,7 @@ def chebfit(x, y, deg, rcond=None, full=False, w=None):
         sv -- singular values of the scaled Vandermonde matrix
         rcond -- value of `rcond`.
 
-        For more details, see `linalg.lstsq`.
+        For more details, see `numpy.linalg.lstsq`.
 
     Warns
     -----
@@ -1615,11 +1621,15 @@ def chebfit(x, y, deg, rcond=None, full=False, w=None):
 
     See Also
     --------
-    polyfit, legfit, lagfit, hermfit, hermefit
+    numpy.polynomial.polynomial.polyfit
+    numpy.polynomial.legendre.legfit
+    numpy.polynomial.laguerre.lagfit
+    numpy.polynomial.hermite.hermfit
+    numpy.polynomial.hermite_e.hermefit
     chebval : Evaluates a Chebyshev series.
     chebvander : Vandermonde matrix of Chebyshev series.
     chebweight : Chebyshev weight function.
-    linalg.lstsq : Computes a least-squares fit from the matrix.
+    numpy.linalg.lstsq : Computes a least-squares fit from the matrix.
     scipy.interpolate.UnivariateSpline : Computes spline fits.
 
     Notes
@@ -1729,7 +1739,11 @@ def chebroots(c):
 
     See Also
     --------
-    polyroots, legroots, lagroots, hermroots, hermeroots
+    numpy.polynomial.polynomial.polyroots
+    numpy.polynomial.legendre.legroots
+    numpy.polynomial.laguerre.lagroots
+    numpy.polynomial.hermite.hermroots
+    numpy.polynomial.hermite_e.hermeroots
 
     Notes
     -----
diff --git a/numpy/polynomial/hermite.py b/numpy/polynomial/hermite.py
index 280cad39e..c679c5298 100644
--- a/numpy/polynomial/hermite.py
+++ b/numpy/polynomial/hermite.py
@@ -233,7 +233,11 @@ def hermline(off, scl):
 
     See Also
     --------
-    polyline, chebline
+    numpy.polynomial.polynomial.polyline
+    numpy.polynomial.chebyshev.chebline
+    numpy.polynomial.legendre.legline
+    numpy.polynomial.laguerre.lagline
+    numpy.polynomial.hermite_e.hermeline
 
     Examples
     --------
@@ -286,7 +290,11 @@ def hermfromroots(roots):
 
     See Also
     --------
-    polyfromroots, legfromroots, lagfromroots, chebfromroots, hermefromroots
+    numpy.polynomial.polynomial.polyfromroots
+    numpy.polynomial.legendre.legfromroots
+    numpy.polynomial.laguerre.lagfromroots
+    numpy.polynomial.chebyshev.chebfromroots
+    numpy.polynomial.hermite_e.hermefromroots
 
     Examples
     --------
@@ -1322,7 +1330,7 @@ def hermfit(x, y, deg, rcond=None, full=False, w=None):
         sv -- singular values of the scaled Vandermonde matrix
         rcond -- value of `rcond`.
 
-        For more details, see `linalg.lstsq`.
+        For more details, see `numpy.linalg.lstsq`.
 
     Warns
     -----
@@ -1336,11 +1344,15 @@ def hermfit(x, y, deg, rcond=None, full=False, w=None):
 
     See Also
     --------
-    chebfit, legfit, lagfit, polyfit, hermefit
+    numpy.polynomial.chebyshev.chebfit
+    numpy.polynomial.legendre.legfit
+    numpy.polynomial.laguerre.lagfit
+    numpy.polynomial.polynomial.polyfit
+    numpy.polynomial.hermite_e.hermefit
     hermval : Evaluates a Hermite series.
     hermvander : Vandermonde matrix of Hermite series.
     hermweight : Hermite weight function
-    linalg.lstsq : Computes a least-squares fit from the matrix.
+    numpy.linalg.lstsq : Computes a least-squares fit from the matrix.
     scipy.interpolate.UnivariateSpline : Computes spline fits.
 
     Notes
@@ -1457,7 +1469,11 @@ def hermroots(c):
 
     See Also
     --------
-    polyroots, legroots, lagroots, chebroots, hermeroots
+    numpy.polynomial.polynomial.polyroots
+    numpy.polynomial.legendre.legroots
+    numpy.polynomial.laguerre.lagroots
+    numpy.polynomial.chebyshev.chebroots
+    numpy.polynomial.hermite_e.hermeroots
 
     Notes
     -----
diff --git a/numpy/polynomial/hermite_e.py b/numpy/polynomial/hermite_e.py
index 9b3b25105..1ce8ebe04 100644
--- a/numpy/polynomial/hermite_e.py
+++ b/numpy/polynomial/hermite_e.py
@@ -218,8 +218,6 @@ def hermeline(off, scl):
     """
     Hermite series whose graph is a straight line.
 
-
-
     Parameters
     ----------
     off, scl : scalars
@@ -233,7 +231,11 @@ def hermeline(off, scl):
 
     See Also
     --------
-    polyline, chebline
+    numpy.polynomial.polynomial.polyline
+    numpy.polynomial.chebyshev.chebline
+    numpy.polynomial.legendre.legline
+    numpy.polynomial.laguerre.lagline
+    numpy.polynomial.hermite.hermline
 
     Examples
     --------
@@ -287,7 +289,11 @@ def hermefromroots(roots):
 
     See Also
     --------
-    polyfromroots, legfromroots, lagfromroots, hermfromroots, chebfromroots
+    numpy.polynomial.polynomial.polyfromroots
+    numpy.polynomial.legendre.legfromroots
+    numpy.polynomial.laguerre.lagfromroots
+    numpy.polynomial.hermite.hermfromroots
+    numpy.polynomial.chebyshev.chebfromroots
 
     Examples
     --------
@@ -1315,7 +1321,7 @@ def hermefit(x, y, deg, rcond=None, full=False, w=None):
         sv -- singular values of the scaled Vandermonde matrix
         rcond -- value of `rcond`.
 
-        For more details, see `linalg.lstsq`.
+        For more details, see `numpy.linalg.lstsq`.
 
     Warns
     -----
@@ -1329,11 +1335,15 @@ def hermefit(x, y, deg, rcond=None, full=False, w=None):
 
     See Also
     --------
-    chebfit, legfit, polyfit, hermfit, polyfit
+    numpy.polynomial.chebyshev.chebfit
+    numpy.polynomial.legendre.legfit
+    numpy.polynomial.polynomial.polyfit
+    numpy.polynomial.hermite.hermfit
+    numpy.polynomial.laguerre.lagfit
     hermeval : Evaluates a Hermite series.
     hermevander : pseudo Vandermonde matrix of Hermite series.
     hermeweight : HermiteE weight function.
-    linalg.lstsq : Computes a least-squares fit from the matrix.
+    numpy.linalg.lstsq : Computes a least-squares fit from the matrix.
     scipy.interpolate.UnivariateSpline : Computes spline fits.
 
     Notes
@@ -1452,7 +1462,11 @@ def hermeroots(c):
 
     See Also
     --------
-    polyroots, legroots, lagroots, hermroots, chebroots
+    numpy.polynomial.polynomial.polyroots
+    numpy.polynomial.legendre.legroots
+    numpy.polynomial.laguerre.lagroots
+    numpy.polynomial.hermite.hermroots
+    numpy.polynomial.chebyshev.chebroots
 
     Notes
     -----
diff --git a/numpy/polynomial/laguerre.py b/numpy/polynomial/laguerre.py
index c1db13215..9cff0b71c 100644
--- a/numpy/polynomial/laguerre.py
+++ b/numpy/polynomial/laguerre.py
@@ -214,8 +214,6 @@ def lagline(off, scl):
     """
     Laguerre series whose graph is a straight line.
 
-
-
     Parameters
     ----------
     off, scl : scalars
@@ -229,7 +227,11 @@ def lagline(off, scl):
 
     See Also
     --------
-    polyline, chebline
+    numpy.polynomial.polynomial.polyline
+    numpy.polynomial.chebyshev.chebline
+    numpy.polynomial.legendre.legline
+    numpy.polynomial.hermite.hermline
+    numpy.polynomial.hermite_e.hermeline
 
     Examples
     --------
@@ -282,7 +284,11 @@ def lagfromroots(roots):
 
     See Also
     --------
-    polyfromroots, legfromroots, chebfromroots, hermfromroots, hermefromroots
+    numpy.polynomial.polynomial.polyfromroots
+    numpy.polynomial.legendre.legfromroots
+    numpy.polynomial.chebyshev.chebfromroots
+    numpy.polynomial.hermite.hermfromroots
+    numpy.polynomial.hermite_e.hermefromroots
 
     Examples
     --------
@@ -1321,7 +1327,7 @@ def lagfit(x, y, deg, rcond=None, full=False, w=None):
         sv -- singular values of the scaled Vandermonde matrix
         rcond -- value of `rcond`.
 
-        For more details, see `linalg.lstsq`.
+        For more details, see `numpy.linalg.lstsq`.
 
     Warns
     -----
@@ -1335,11 +1341,15 @@ def lagfit(x, y, deg, rcond=None, full=False, w=None):
 
     See Also
     --------
-    chebfit, legfit, polyfit, hermfit, hermefit
+    numpy.polynomial.polynomial.polyfit
+    numpy.polynomial.legendre.legfit
+    numpy.polynomial.chebyshev.chebfit
+    numpy.polynomial.hermite.hermfit
+    numpy.polynomial.hermite_e.hermefit
     lagval : Evaluates a Laguerre series.
     lagvander : pseudo Vandermonde matrix of Laguerre series.
     lagweight : Laguerre weight function.
-    linalg.lstsq : Computes a least-squares fit from the matrix.
+    numpy.linalg.lstsq : Computes a least-squares fit from the matrix.
     scipy.interpolate.UnivariateSpline : Computes spline fits.
 
     Notes
@@ -1455,7 +1465,11 @@ def lagroots(c):
 
     See Also
     --------
-    polyroots, legroots, chebroots, hermroots, hermeroots
+    numpy.polynomial.polynomial.polyroots
+    numpy.polynomial.legendre.legroots
+    numpy.polynomial.chebyshev.chebroots
+    numpy.polynomial.hermite.hermroots
+    numpy.polynomial.hermite_e.hermeroots
 
     Notes
     -----
diff --git a/numpy/polynomial/legendre.py b/numpy/polynomial/legendre.py
index 7b5b665f2..427f9f82f 100644
--- a/numpy/polynomial/legendre.py
+++ b/numpy/polynomial/legendre.py
@@ -243,7 +243,11 @@ def legline(off, scl):
 
     See Also
     --------
-    polyline, chebline
+    numpy.polynomial.polynomial.polyline
+    numpy.polynomial.chebyshev.chebline
+    numpy.polynomial.laguerre.lagline
+    numpy.polynomial.hermite.hermline
+    numpy.polynomial.hermite_e.hermeline
 
     Examples
     --------
@@ -296,7 +300,11 @@ def legfromroots(roots):
 
     See Also
     --------
-    polyfromroots, chebfromroots, lagfromroots, hermfromroots, hermefromroots
+    numpy.polynomial.polynomial.polyfromroots
+    numpy.polynomial.chebyshev.chebfromroots
+    numpy.polynomial.laguerre.lagfromroots
+    numpy.polynomial.hermite.hermfromroots
+    numpy.polynomial.hermite_e.hermefromroots
 
     Examples
     --------
@@ -1343,7 +1351,7 @@ def legfit(x, y, deg, rcond=None, full=False, w=None):
         sv -- singular values of the scaled Vandermonde matrix
         rcond -- value of `rcond`.
 
-        For more details, see `linalg.lstsq`.
+        For more details, see `numpy.linalg.lstsq`.
 
     Warns
     -----
@@ -1357,11 +1365,15 @@ def legfit(x, y, deg, rcond=None, full=False, w=None):
 
     See Also
     --------
-    chebfit, polyfit, lagfit, hermfit, hermefit
+    numpy.polynomial.polynomial.polyfit
+    numpy.polynomial.chebyshev.chebfit
+    numpy.polynomial.laguerre.lagfit
+    numpy.polynomial.hermite.hermfit
+    numpy.polynomial.hermite_e.hermefit
     legval : Evaluates a Legendre series.
     legvander : Vandermonde matrix of Legendre series.
     legweight : Legendre weight function (= 1).
-    linalg.lstsq : Computes a least-squares fit from the matrix.
+    numpy.linalg.lstsq : Computes a least-squares fit from the matrix.
     scipy.interpolate.UnivariateSpline : Computes spline fits.
 
     Notes
@@ -1470,7 +1482,11 @@ def legroots(c):
 
     See Also
     --------
-    polyroots, chebroots, lagroots, hermroots, hermeroots
+    numpy.polynomial.polynomial.polyroots
+    numpy.polynomial.chebyshev.chebroots
+    numpy.polynomial.laguerre.lagroots
+    numpy.polynomial.hermite.hermroots
+    numpy.polynomial.hermite_e.hermeroots
 
     Notes
     -----
diff --git a/numpy/polynomial/polynomial.py b/numpy/polynomial/polynomial.py
index 83693441f..1baa7d870 100644
--- a/numpy/polynomial/polynomial.py
+++ b/numpy/polynomial/polynomial.py
@@ -127,7 +127,11 @@ def polyline(off, scl):
 
     See Also
     --------
-    chebline
+    numpy.polynomial.chebyshev.chebline
+    numpy.polynomial.legendre.legline
+    numpy.polynomial.laguerre.lagline
+    numpy.polynomial.hermite.hermline
+    numpy.polynomial.hermite_e.hermeline
 
     Examples
     --------
@@ -179,8 +183,11 @@ def polyfromroots(roots):
 
     See Also
     --------
-    chebfromroots, legfromroots, lagfromroots, hermfromroots
-    hermefromroots
+    numpy.polynomial.chebyshev.chebfromroots
+    numpy.polynomial.legendre.legfromroots
+    numpy.polynomial.laguerre.lagfromroots
+    numpy.polynomial.hermite.hermfromroots
+    numpy.polynomial.hermite_e.hermefromroots
 
     Notes
     -----
@@ -1267,7 +1274,7 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None):
         sv -- singular values of the scaled Vandermonde matrix
         rcond -- value of `rcond`.
 
-        For more details, see `linalg.lstsq`.
+        For more details, see `numpy.linalg.lstsq`.
 
     Raises
     ------
@@ -1281,10 +1288,14 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None):
 
     See Also
     --------
-    chebfit, legfit, lagfit, hermfit, hermefit
+    numpy.polynomial.chebyshev.chebfit
+    numpy.polynomial.legendre.legfit
+    numpy.polynomial.laguerre.lagfit
+    numpy.polynomial.hermite.hermfit
+    numpy.polynomial.hermite_e.hermefit
     polyval : Evaluates a polynomial.
     polyvander : Vandermonde matrix for powers.
-    linalg.lstsq : Computes a least-squares fit from the matrix.
+    numpy.linalg.lstsq : Computes a least-squares fit from the matrix.
     scipy.interpolate.UnivariateSpline : Computes spline fits.
 
     Notes
@@ -1411,7 +1422,11 @@ def polyroots(c):
 
     See Also
     --------
-    chebroots
+    numpy.polynomial.chebyshev.chebroots
+    numpy.polynomial.legendre.legroots
+    numpy.polynomial.laguerre.lagroots
+    numpy.polynomial.hermite.hermroots
+    numpy.polynomial.hermite_e.hermeroots
 
     Notes
     -----
diff --git a/pyproject.toml b/pyproject.toml
index 3566cc683..cfdbfa6c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,8 +2,8 @@
 # Minimum requirements for the build system to execute.
 requires = [
     "setuptools<49.2.0",
-    "wheel",
-    "Cython>=0.29.21",  # Note: keep in sync with tools/cythonize.py
+    "wheel<=0.35.1",
+    "Cython>=0.29.21,<3.0",  # Note: keep in sync with tools/cythonize.py
 ]
 
 
diff --git a/test_requirements.txt b/test_requirements.txt
index 7ef91125c..036e9861f 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -1,8 +1,8 @@
 cython==0.29.21
 wheel
 setuptools<49.2.0
-hypothesis==5.33.0
-pytest==6.0.1
+hypothesis==5.35.3
+pytest==6.0.2
 pytz==2020.1
 pytest-cov==2.10.1
 pickle5; python_version == '3.7'
diff --git a/tools/swig/numpy.i b/tools/swig/numpy.i
index 8416e82f3..6b69ce96e 100644
--- a/tools/swig/numpy.i
+++ b/tools/swig/numpy.i
@@ -114,7 +114,7 @@
     if (py_obj == NULL          ) return "C NULL value";
     if (py_obj == Py_None       ) return "Python None" ;
     if (PyCallable_Check(py_obj)) return "callable"    ;
-    if (PyString_Check(  py_obj)) return "string"      ;
+    if (PyBytes_Check(   py_obj)) return "string"      ;
     if (PyInt_Check(     py_obj)) return "int"         ;
     if (PyFloat_Check(   py_obj)) return "float"       ;
     if (PyDict_Check(    py_obj)) return "dict"        ;