72 files changed, 1682 insertions, 444 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py
index 83487dc97..66b8e3eca 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -337,7 +337,7 @@ else:
         """
         try:
             x = ones(2, dtype=float32)
-            if not abs(x.dot(x) - 2.0) < 1e-5:
+            if not abs(x.dot(x) - float32(2.0)) < 1e-5:
                 raise AssertionError()
         except AssertionError:
             msg = ("The current Numpy installation ({!r}) fails to "
@@ -413,6 +413,8 @@ else:
     # it is tidier organized.
     core.multiarray._multiarray_umath._reload_guard()
 
+    core._set_promotion_state(os.environ.get("NPY_PROMOTION_STATE", "legacy"))
+
     # Tell PyInstaller where to find hook-numpy.py
     def _pyinstaller_hooks_dir():
         from pathlib import Path
diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index d6faa9ca3..001fa4e88 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -9,6 +9,7 @@ import enum
 from abc import abstractmethod
 from types import TracebackType, MappingProxyType
 from contextlib import ContextDecorator
+from contextlib import contextmanager
 
 if sys.version_info >= (3, 9):
     from types import GenericAlias
@@ -180,6 +181,7 @@ from collections.abc import (
 from typing import (
     Literal as L,
     Any,
+    Generator,
     Generic,
     IO,
     NoReturn,
@@ -3350,6 +3352,11 @@ class errstate(Generic[_CallType], ContextDecorator):
         /,
     ) -> None: ...
 
+@contextmanager
+def _no_nep50_warning() -> Generator[None, None, None]: ...
+def _get_promotion_state() -> str: ...
+def _set_promotion_state(state: str, /) -> None: ...
+
 class ndenumerate(Generic[_ScalarType]):
     iter: flatiter[NDArray[_ScalarType]]
     @overload
diff --git a/numpy/_typing/_nested_sequence.py b/numpy/_typing/_nested_sequence.py
index 7c12c4a87..360c0f1b2 100644
--- a/numpy/_typing/_nested_sequence.py
+++ b/numpy/_typing/_nested_sequence.py
@@ -36,9 +36,9 @@ class _NestedSequence(Protocol[_T_co]):
 
         >>> from typing import TYPE_CHECKING
         >>> import numpy as np
-        >>> from numpy._typing import _NestedSequnce
+        >>> from numpy._typing import _NestedSequence
 
-        >>> def get_dtype(seq: _NestedSequnce[float]) -> np.dtype[np.float64]:
+        >>> def get_dtype(seq: _NestedSequence[float]) -> np.dtype[np.float64]:
         ...     return np.asarray(seq).dtype
 
         >>> a = get_dtype([1.0])
diff --git a/numpy/conftest.py b/numpy/conftest.py
index fd5fdd77d..8aa6587ee 100644
--- a/numpy/conftest.py
+++ b/numpy/conftest.py
@@ -117,3 +117,20 @@ def add_np(doctest_namespace):
 @pytest.fixture(autouse=True)
 def env_setup(monkeypatch):
     monkeypatch.setenv('PYTHONHASHSEED', '0')
+
+
+@pytest.fixture(params=[True, False])
+def weak_promotion(request):
+    """
+    Fixture to ensure "legacy" promotion state or change it to use the new
+    weak promotion (plus warning).  `old_promotion` should be used as a
+    parameter in the function.
+    """
+    state = numpy._get_promotion_state()
+    if request.param:
+        numpy._set_promotion_state("weak_and_warn")
+    else:
+        numpy._set_promotion_state("legacy")
+
+    yield request.param
+    numpy._set_promotion_state(state)
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 3e8df6d46..f71136eef 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -1084,13 +1084,32 @@ add_newdoc('numpy.core.multiarray', 'ascontiguousarray',
 
     Examples
     --------
-    >>> x = np.arange(6).reshape(2,3)
-    >>> np.ascontiguousarray(x, dtype=np.float32)
-    array([[0., 1., 2.],
-           [3., 4., 5.]], dtype=float32)
+    Starting with a Fortran-contiguous array:
+
+    >>> x = np.ones((2, 3), order='F')
+    >>> x.flags['F_CONTIGUOUS']
+    True
+
+    Calling ``ascontiguousarray`` makes a C-contiguous copy:
+
+    >>> y = np.ascontiguousarray(x)
+    >>> y.flags['C_CONTIGUOUS']
+    True
+    >>> np.may_share_memory(x, y)
+    False
+
+    Now, starting with a C-contiguous array:
+
+    >>> x = np.ones((2, 3), order='C')
     >>> x.flags['C_CONTIGUOUS']
     True
 
+    Then, calling ``ascontiguousarray`` returns the same object:
+
+    >>> y = np.ascontiguousarray(x)
+    >>> x is y
+    True
+
     Note: This function returns an array with at least one-dimension (1-d)
     so it will not preserve 0-d arrays.
 
@@ -1130,12 +1149,31 @@ add_newdoc('numpy.core.multiarray', 'asfortranarray',
 
     Examples
     --------
-    >>> x = np.arange(6).reshape(2,3)
+    Starting with a C-contiguous array:
+
+    >>> x = np.ones((2, 3), order='C')
+    >>> x.flags['C_CONTIGUOUS']
+    True
+
+    Calling ``asfortranarray`` makes a Fortran-contiguous copy:
+
     >>> y = np.asfortranarray(x)
-    >>> x.flags['F_CONTIGUOUS']
-    False
     >>> y.flags['F_CONTIGUOUS']
     True
+    >>> np.may_share_memory(x, y)
+    False
+
+    Now, starting with a Fortran-contiguous array:
+
+    >>> x = np.ones((2, 3), order='F')
+    >>> x.flags['F_CONTIGUOUS']
+    True
+
+    Then, calling ``asfortranarray`` returns the same object:
+
+    >>> y = np.asfortranarray(x)
+    >>> x is y
+    True
 
     Note: This function returns an array with at least one-dimension (1-d)
     so it will not preserve 0-d arrays.
diff --git a/numpy/core/_asarray.py b/numpy/core/_asarray.py
index 89d422e99..cbaab8c3f 100644
--- a/numpy/core/_asarray.py
+++ b/numpy/core/_asarray.py
@@ -14,6 +14,15 @@ from .multiarray import array, asanyarray
 __all__ = ["require"]
 
 
+POSSIBLE_FLAGS = {
+    'C': 'C', 'C_CONTIGUOUS': 'C', 'CONTIGUOUS': 'C',
+    'F': 'F', 'F_CONTIGUOUS': 'F', 'FORTRAN': 'F',
+    'A': 'A', 'ALIGNED': 'A',
+    'W': 'W', 'WRITEABLE': 'W',
+    'O': 'O', 'OWNDATA': 'O',
+    'E': 'E', 'ENSUREARRAY': 'E'
+}
+
 
 def _require_dispatcher(a, dtype=None, requirements=None, *, like=None):
     return (like,)
@@ -36,7 +45,7 @@ def require(a, dtype=None, requirements=None, *, like=None):
        The required data-type. If None preserve the current dtype. If your
        application requires the data to be in native byteorder, include
        a byteorder specification as a part of the dtype specification.
-    requirements : str or list of str
+    requirements : str or sequence of str
        The requirements list can be any of the following
 
        * 'F_CONTIGUOUS' ('F') - ensure a Fortran-contiguous array
@@ -97,16 +106,10 @@ def require(a, dtype=None, requirements=None, *, like=None):
             like=like,
         )
 
-    possible_flags = {'C': 'C', 'C_CONTIGUOUS': 'C', 'CONTIGUOUS': 'C',
-                      'F': 'F', 'F_CONTIGUOUS': 'F', 'FORTRAN': 'F',
-                      'A': 'A', 'ALIGNED': 'A',
-                      'W': 'W', 'WRITEABLE': 'W',
-                      'O': 'O', 'OWNDATA': 'O',
-                      'E': 'E', 'ENSUREARRAY': 'E'}
     if not requirements:
         return asanyarray(a, dtype=dtype)
-    else:
-        requirements = {possible_flags[x.upper()] for x in requirements}
+
+    requirements = {POSSIBLE_FLAGS[x.upper()] for x in requirements}
 
     if 'E' in requirements:
         requirements.remove('E')
@@ -128,8 +131,7 @@ def require(a, dtype=None, requirements=None, *, like=None):
 
     for prop in requirements:
         if not arr.flags[prop]:
-            arr = arr.copy(order)
-            break
+            return arr.copy(order)
     return arr
 
 
diff --git a/numpy/core/_machar.py b/numpy/core/_machar.py
index ace19a429..3cc7db278 100644
--- a/numpy/core/_machar.py
+++ b/numpy/core/_machar.py
@@ -326,7 +326,9 @@ class MachAr:
         self.tiny = self.xmin
         self.huge = self.xmax
         self.smallest_normal = self.xmin
+        self._str_smallest_normal = float_to_str(self.xmin)
         self.smallest_subnormal = float_to_float(smallest_subnormal)
+        self._str_smallest_subnormal = float_to_str(smallest_subnormal)
 
         import math
         self.precision = int(-math.log10(float_to_float(self.eps)))
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index eda00147d..040f02a9d 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -11,6 +11,7 @@ from numpy.core import umath as um
 from numpy.core.multiarray import asanyarray
 from numpy.core import numerictypes as nt
 from numpy.core import _exceptions
+from numpy.core._ufunc_config import _no_nep50_warning
 from numpy._globals import _NoValue
 from numpy.compat import pickle, os_fspath
 
@@ -179,8 +180,9 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
 
     ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)
     if isinstance(ret, mu.ndarray):
-        ret = um.true_divide(
-                ret, rcount, out=ret, casting='unsafe', subok=False)
+        with _no_nep50_warning():
+            ret = um.true_divide(
+                    ret, rcount, out=ret, casting='unsafe', subok=False)
         if is_float16_result and out is None:
             ret = arr.dtype.type(ret)
     elif hasattr(ret, 'dtype'):
@@ -220,8 +222,9 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *,
         # matching rcount to arrmean when where is specified as array
         div = rcount.reshape(arrmean.shape)
     if isinstance(arrmean, mu.ndarray):
-        arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
-                                 subok=False)
+        with _no_nep50_warning():
+            arrmean = um.true_divide(arrmean, div, out=arrmean,
+                                     casting='unsafe', subok=False)
     elif hasattr(arrmean, "dtype"):
         arrmean = arrmean.dtype.type(arrmean / rcount)
     else:
@@ -251,8 +254,9 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *,
 
     # divide by degrees of freedom
     if isinstance(ret, mu.ndarray):
-        ret = um.true_divide(
-                ret, rcount, out=ret, casting='unsafe', subok=False)
+        with _no_nep50_warning():
+            ret = um.true_divide(
+                    ret, rcount, out=ret, casting='unsafe', subok=False)
     elif hasattr(ret, 'dtype'):
         ret = ret.dtype.type(ret / rcount)
     else:
diff --git a/numpy/core/_ufunc_config.py b/numpy/core/_ufunc_config.py
index a731f6bf7..5aac7ab09 100644
--- a/numpy/core/_ufunc_config.py
+++ b/numpy/core/_ufunc_config.py
@@ -5,6 +5,7 @@ This provides helpers which wrap `umath.geterrobj` and `umath.seterrobj`
 """
 import collections.abc
 import contextlib
+import contextvars
 
 from .overrides import set_module
 from .umath import (
@@ -16,7 +17,7 @@ from . import umath
 
 __all__ = [
     "seterr", "geterr", "setbufsize", "getbufsize", "seterrcall", "geterrcall",
-    "errstate",
+    "errstate", '_no_nep50_warning'
 ]
 
 _errdict = {"ignore": ERR_IGNORE,
@@ -96,7 +97,7 @@ def seterr(all=None, divide=None, over=None, under=None, invalid=None):
     >>> np.int16(32000) * np.int16(3)
     Traceback (most recent call last):
       File "<stdin>", line 1, in <module>
-    FloatingPointError: overflow encountered in short_scalars
+    FloatingPointError: overflow encountered in scalar multiply
 
     >>> old_settings = np.seterr(all='print')
     >>> np.geterr()
@@ -444,3 +445,22 @@ def _setdef():
 
 # set the default values
 _setdef()
+
+
+NO_NEP50_WARNING = contextvars.ContextVar("_no_nep50_warning", default=False)
+
+@set_module('numpy')
+@contextlib.contextmanager
+def _no_nep50_warning():
+    """
+    Context manager to disable NEP 50 warnings.  This context manager is
+    only relevant if the NEP 50 warnings are enabled globally (which is not
+    thread/context safe).
+
+    This warning context manager itself is fully safe, however.
+    """
+    token = NO_NEP50_WARNING.set(True)
+    try:
+        yield
+    finally:
+        NO_NEP50_WARNING.reset(token)
diff --git a/numpy/core/_ufunc_config.pyi b/numpy/core/_ufunc_config.pyi
index b7c2ebefc..f56504507 100644
--- a/numpy/core/_ufunc_config.pyi
+++ b/numpy/core/_ufunc_config.pyi
@@ -34,4 +34,4 @@ def seterrcall(
 ) -> None | _ErrFunc | _SupportsWrite[str]: ...
 def geterrcall() -> None | _ErrFunc | _SupportsWrite[str]: ...
 
-# See `numpy/__init__.pyi` for the `errstate` class
+# See `numpy/__init__.pyi` for the `errstate` class and `no_nep5_warnings`
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 3521e778e..fdda44e56 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -445,6 +445,22 @@ def center(a, width, fillchar=' '):
     See Also
     --------
     str.center
+    
+    Notes
+    -----
+    This function is intended to work with arrays of strings.  The
+    fill character is not applied to numeric types.
+
+    Examples
+    --------
+    >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
+    array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
+    >>> np.char.center(c, width=9)
+    array(['   a1b2  ', '   1b2a  ', '   b2a1  ', '   2a1b  '], dtype='<U9')
+    >>> np.char.center(c, width=9, fillchar='*')
+    array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
+    >>> np.char.center(c, width=1)
+    array(['a', '1', 'b', '2'], dtype='<U1')
 
     """
     a_arr = numpy.asarray(a)
diff --git a/numpy/core/getlimits.py b/numpy/core/getlimits.py
index ab4a4d2be..4149a5303 100644
--- a/numpy/core/getlimits.py
+++ b/numpy/core/getlimits.py
@@ -343,8 +343,9 @@ def _get_machar(ftype):
         return ma_like
     # Fall back to parameter discovery
     warnings.warn(
-        'Signature {} for {} does not match any known type: '
-        'falling back to type probe function'.format(key, ftype),
+        f'Signature {key} for {ftype} does not match any known type: '
+        'falling back to type probe function.\n'
+        'This warnings indicates broken support for the dtype!',
         UserWarning, stacklevel=2)
     return _discovered_machar(ftype)
 
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 97e0f4e2a..1db3b974f 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -872,17 +872,6 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *);
  */
 #define NPY_ARRAY_ENSUREARRAY     0x0040
 
-#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD
-    /*
-     * Dual use of the ENSUREARRAY flag, to indicate that this was converted
-     * from a python float, int, or complex.
-     * An array using this flag must be a temporary array that can never
-     * leave the C internals of NumPy.  Even if it does, ENSUREARRAY is
-     * absolutely safe to abuse, since it already is a base class array :).
-     */
-    #define _NPY_ARRAY_WAS_PYSCALAR   0x0040
-#endif  /* NPY_INTERNAL_BUILD */
-
 /*
  * Make sure that the strides are in units of the element size Needed
  * for some operations with record-arrays.
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index bead0dc14..954c3d0e9 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -1,10 +1,6 @@
 #ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_MATH_H_
 #define NUMPY_CORE_INCLUDE_NUMPY_NPY_MATH_H_
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 #include <numpy/npy_common.h>
 
 #include <math.h>
@@ -21,6 +17,10 @@ extern "C" {
 #endif
 
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /*
  * NAN and INFINITY like macros (same behavior as glibc for NAN, same as C99
  * for INFINITY)
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 8c14583e6..65b7cb46d 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -17,6 +17,7 @@ from ._multiarray_umath import (
     _fastCopyAndTranspose, _flagdict, from_dlpack, _insert, _reconstruct,
     _vec_string, _ARRAY_API, _monotonicity, _get_ndarray_c_version,
     _get_madvise_hugepage, _set_madvise_hugepage,
+    _get_promotion_state, _set_promotion_state,
     )
 
 __all__ = [
@@ -40,7 +41,8 @@ __all__ = [
     'ravel_multi_index', 'result_type', 'scalar', 'set_datetimeparse_function',
     'set_legacy_print_mode', 'set_numeric_ops', 'set_string_function',
     'set_typeDict', 'shares_memory', 'tracemalloc_domain', 'typeinfo',
-    'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros']
+    'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros',
+    '_get_promotion_state', '_set_promotion_state']
 
 # For backward compatibility, make sure pickle imports these functions from here
 _reconstruct.__module__ = 'numpy.core.multiarray'
@@ -68,6 +70,8 @@ promote_types.__module__ = 'numpy'
 set_numeric_ops.__module__ = 'numpy'
 seterrobj.__module__ = 'numpy'
 zeros.__module__ = 'numpy'
+_get_promotion_state.__module__ = 'numpy'
+_set_promotion_state.__module__ = 'numpy'
 
 
 # We can't verify dispatcher signatures because NumPy's C functions don't
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index bb3cbf054..cfcd237aa 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -17,7 +17,7 @@ from .multiarray import (
     fromstring, inner, lexsort, matmul, may_share_memory,
     min_scalar_type, ndarray, nditer, nested_iters, promote_types,
     putmask, result_type, set_numeric_ops, shares_memory, vdot, where,
-    zeros, normalize_axis_index)
+    zeros, normalize_axis_index, _get_promotion_state, _set_promotion_state)
 
 from . import overrides
 from . import umath
@@ -27,7 +27,7 @@ from .umath import (multiply, invert, sin, PINF, NAN)
 from . import numerictypes
 from .numerictypes import longlong, intc, int_, float_, complex_, bool_
 from ._exceptions import TooHardError, AxisError
-from ._ufunc_config import errstate
+from ._ufunc_config import errstate, _no_nep50_warning
 
 bitwise_not = invert
 ufunc = type(sin)
@@ -54,7 +54,8 @@ __all__ = [
     'False_', 'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS',
     'BUFSIZE', 'ALLOW_THREADS', 'ComplexWarning', 'full', 'full_like',
     'matmul', 'shares_memory', 'may_share_memory', 'MAY_SHARE_BOUNDS',
-    'MAY_SHARE_EXACT', 'TooHardError', 'AxisError']
+    'MAY_SHARE_EXACT', 'TooHardError', 'AxisError',
+    '_get_promotion_state', '_set_promotion_state']
 
 
 @set_module('numpy')
@@ -1621,6 +1622,10 @@ def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None):
     dtype = promote_types(a.dtype, b.dtype)
     cp = empty(shape, dtype)
 
+    # recast arrays as dtype
+    a = a.astype(dtype)
+    b = b.astype(dtype)
+
     # create local aliases for readability
     a0 = a[..., 0]
     a1 = a[..., 1]
@@ -2352,7 +2357,7 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False):
     array([False,  True])
     """
     def within_tol(x, y, atol, rtol):
-        with errstate(invalid='ignore'):
+        with errstate(invalid='ignore'), _no_nep50_warning():
             return less_equal(abs(x-y), atol + rtol * abs(y))
 
     x = asanyarray(a)
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 543b6ae39..cdf6117e6 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -9,6 +9,7 @@ import glob
 from os.path import join
 
 from numpy.distutils import log
+from numpy.distutils.msvccompiler import lib_opts_if_msvc
 from distutils.dep_util import newer
 from sysconfig import get_config_var
 from numpy.compat import npy_load_module
@@ -80,7 +81,9 @@ def can_link_svml():
     if NPY_DISABLE_SVML:
         return False
     platform = sysconfig.get_platform()
-    return "x86_64" in platform and "linux" in platform
+    return ("x86_64" in platform
+            and "linux" in platform
+            and sys.maxsize > 2**31)
 
 def check_svml_submodule(svmlpath):
     if not os.path.exists(svmlpath + "/README.md"):
@@ -771,29 +774,12 @@ def configuration(parent_package='',top_path=None):
                        join('src', 'npymath', 'halffloat.c')
                        ]
 
-    def opts_if_msvc(build_cmd):
-        """ Add flags if we are using MSVC compiler
-
-        We can't see `build_cmd` in our scope, because we have not initialized
-        the distutils build command, so use this deferred calculation to run
-        when we are building the library.
-        """
-        if build_cmd.compiler.compiler_type != 'msvc':
-            return []
-        # Explicitly disable whole-program optimization.
-        flags = ['/GL-']
-        # Disable voltbl section for vc142 to allow link using mingw-w64; see:
-        # https://github.com/matthew-brett/dll_investigation/issues/1#issuecomment-1100468171
-        if build_cmd.compiler_opt.cc_test_flags(['-d2VolatileMetadata-']):
-            flags.append('-d2VolatileMetadata-')
-        return flags
-
     config.add_installed_library('npymath',
             sources=npymath_sources + [get_mathlib_info],
             install_dir='lib',
             build_info={
                 'include_dirs' : [],  # empty list required for creating npy_math_internal.h
-                'extra_compiler_args': [opts_if_msvc],
+                'extra_compiler_args': [lib_opts_if_msvc],
             })
     config.add_npy_pkg_config("npymath.ini.in", "lib/npy-pkg-config",
             subst_dict)
diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h
index 4607d6f27..8f4680c8f 100644
--- a/numpy/core/src/common/simd/neon/math.h
+++ b/numpy/core/src/common/simd/neon/math.h
@@ -161,7 +161,7 @@ NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a)
 #else
     // ARMv7 NEON only supports fp to int truncate conversion.
     // a magic trick of adding 1.5 * 2**23 is used for rounding
-    // to nearest even and then substract this magic number to get
+    // to nearest even and then subtract this magic number to get
     // the integer.
     const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f));
     const npyv_f32 magic = vdupq_n_f32(12582912.0f); // 1.5 * 2**23
diff --git a/numpy/core/src/multiarray/abstractdtypes.c b/numpy/core/src/multiarray/abstractdtypes.c
index b0345c46b..3e89d045e 100644
--- a/numpy/core/src/multiarray/abstractdtypes.c
+++ b/numpy/core/src/multiarray/abstractdtypes.c
@@ -164,7 +164,39 @@ int_common_dtype(PyArray_DTypeMeta *NPY_UNUSED(cls), PyArray_DTypeMeta *other)
     }
     else if (NPY_DT_is_legacy(other)) {
         /* This is a back-compat fallback to usually do the right thing... */
-        return PyArray_DTypeFromTypeNum(NPY_UINT8);
+        PyArray_DTypeMeta *uint8_dt = PyArray_DTypeFromTypeNum(NPY_UINT8);
+        PyArray_DTypeMeta *res = NPY_DT_CALL_common_dtype(other, uint8_dt);
+        Py_DECREF(uint8_dt);
+        if (res == NULL) {
+            PyErr_Clear();
+        }
+        else if (res == (PyArray_DTypeMeta *)Py_NotImplemented) {
+            Py_DECREF(res);
+        }
+        else {
+            return res;
+        }
+        /* Try again with `int8`, an error may have been set, though */
+        PyArray_DTypeMeta *int8_dt = PyArray_DTypeFromTypeNum(NPY_INT8);
+        res = NPY_DT_CALL_common_dtype(other, int8_dt);
+        Py_DECREF(int8_dt);
+        if (res == NULL) {
+            PyErr_Clear();
+        }
+        else if (res == (PyArray_DTypeMeta *)Py_NotImplemented) {
+            Py_DECREF(res);
+        }
+        else {
+            return res;
+        }
+        /* And finally, we will try the default integer, just for sports... */
+        PyArray_DTypeMeta *default_int = PyArray_DTypeFromTypeNum(NPY_LONG);
+        res = NPY_DT_CALL_common_dtype(other, default_int);
+        Py_DECREF(default_int);
+        if (res == NULL) {
+            PyErr_Clear();
+        }
+        return res;
     }
     Py_INCREF(Py_NotImplemented);
     return (PyArray_DTypeMeta *)Py_NotImplemented;
@@ -191,7 +223,23 @@ float_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
     }
     else if (NPY_DT_is_legacy(other)) {
         /* This is a back-compat fallback to usually do the right thing... */
-        return PyArray_DTypeFromTypeNum(NPY_HALF);
+        PyArray_DTypeMeta *half_dt = PyArray_DTypeFromTypeNum(NPY_HALF);
+        PyArray_DTypeMeta *res = NPY_DT_CALL_common_dtype(other, half_dt);
+        Py_DECREF(half_dt);
+        if (res == NULL) {
+            PyErr_Clear();
+        }
+        else if (res == (PyArray_DTypeMeta *)Py_NotImplemented) {
+            Py_DECREF(res);
+        }
+        else {
+            return res;
+        }
+        /* Retry with double (the default float) */
+        PyArray_DTypeMeta *double_dt = PyArray_DTypeFromTypeNum(NPY_DOUBLE);
+        res = NPY_DT_CALL_common_dtype(other, double_dt);
+        Py_DECREF(double_dt);
+        return res;
     }
     Py_INCREF(Py_NotImplemented);
     return (PyArray_DTypeMeta *)Py_NotImplemented;
@@ -229,7 +277,24 @@ complex_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
     }
     else if (NPY_DT_is_legacy(other)) {
         /* This is a back-compat fallback to usually do the right thing... */
-        return PyArray_DTypeFromTypeNum(NPY_CFLOAT);
+        PyArray_DTypeMeta *cfloat_dt = PyArray_DTypeFromTypeNum(NPY_CFLOAT);
+        PyArray_DTypeMeta *res = NPY_DT_CALL_common_dtype(other, cfloat_dt);
+        Py_DECREF(cfloat_dt);
+        if (res == NULL) {
+            PyErr_Clear();
+        }
+        else if (res == (PyArray_DTypeMeta *)Py_NotImplemented) {
+            Py_DECREF(res);
+        }
+        else {
+            return res;
+        }
+        /* Retry with cdouble (the default complex) */
+        PyArray_DTypeMeta *cdouble_dt = PyArray_DTypeFromTypeNum(NPY_CDOUBLE);
+        res = NPY_DT_CALL_common_dtype(other, cdouble_dt);
+        Py_DECREF(cdouble_dt);
+        return res;
+
     }
     else if (other == &PyArray_PyIntAbstractDType ||
              other == &PyArray_PyFloatAbstractDType) {
diff --git a/numpy/core/src/multiarray/abstractdtypes.h b/numpy/core/src/multiarray/abstractdtypes.h
index 42c192cac..6901ec213 100644
--- a/numpy/core/src/multiarray/abstractdtypes.h
+++ b/numpy/core/src/multiarray/abstractdtypes.h
@@ -1,6 +1,7 @@
 #ifndef NUMPY_CORE_SRC_MULTIARRAY_ABSTRACTDTYPES_H_
 #define NUMPY_CORE_SRC_MULTIARRAY_ABSTRACTDTYPES_H_
 
+#include "arrayobject.h"
 #include "dtypemeta.h"
 
 
@@ -16,4 +17,56 @@ NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyComplexAbstractDType;
 NPY_NO_EXPORT int
 initialize_and_map_pytypes_to_dtypes(void);
 
+
+/*
+ * When we get a Python int, float, or complex, we may have to use weak
+ * promotion logic.
+ * To implement this, we sometimes have to tag the converted (temporary)
+ * array when the original object was a Python scalar.
+ *
+ * @param obj The original Python object.
+ * @param arr The array into which the Python object was converted.
+ * @param[in,out] **dtype A pointer to the array's DType, if not NULL it will be
+ *        replaced with the abstract DType.
+ * @return 0 if the `obj` was not a python scalar, and 1 if it was.
+ */
+static NPY_INLINE int
+npy_mark_tmp_array_if_pyscalar(
+        PyObject *obj, PyArrayObject *arr, PyArray_DTypeMeta **dtype)
+{
+    /*
+     * We check the array dtype for two reasons: First, booleans are
+     * integer subclasses.  Second, an int, float, or complex could have
+     * a custom DType registered, and then we should use that.
+     * Further, `np.float64` is a double subclass, so must reject it.
+     */
+    if (PyLong_Check(obj)
+            && (PyArray_ISINTEGER(arr) || PyArray_ISOBJECT(arr))) {
+        ((PyArrayObject_fields *)arr)->flags |= NPY_ARRAY_WAS_PYTHON_INT;
+        if (dtype != NULL) {
+            Py_INCREF(&PyArray_PyIntAbstractDType);
+            Py_SETREF(*dtype, &PyArray_PyIntAbstractDType);
+        }
+        return 1;
+    }
+    else if (PyFloat_Check(obj) && !PyArray_IsScalar(obj, Double)
+             && PyArray_TYPE(arr) == NPY_DOUBLE) {
+        ((PyArrayObject_fields *)arr)->flags |= NPY_ARRAY_WAS_PYTHON_FLOAT;
+        if (dtype != NULL) {
+            Py_INCREF(&PyArray_PyFloatAbstractDType);
+            Py_SETREF(*dtype, &PyArray_PyFloatAbstractDType);
+        }
+        return 1;
+    }
+    else if (PyComplex_Check(obj) && PyArray_TYPE(arr) == NPY_CDOUBLE) {
+        ((PyArrayObject_fields *)arr)->flags |= NPY_ARRAY_WAS_PYTHON_COMPLEX;
+        if (dtype != NULL) {
+            Py_INCREF(&PyArray_PyComplexAbstractDType);
+            Py_SETREF(*dtype, &PyArray_PyComplexAbstractDType);
+        }
+        return 1;
+    }
+    return 0;
+}
+
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_ABSTRACTDTYPES_H_ */
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index d18fe1b10..b1302738d 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -1261,7 +1261,8 @@ array_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds)
             descr = NULL;
             goto fail;
         }
-        if (PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT)) {
+        /* Logic shared by `empty`, `empty_like`, and `ndarray.__new__` */
+        if (PyDataType_REFCHK(PyArray_DESCR(ret))) {
             /* place Py_None in object positions */
             PyArray_FillObjectArray(ret, Py_None);
             if (PyErr_Occurred()) {
diff --git a/numpy/core/src/multiarray/arrayobject.h b/numpy/core/src/multiarray/arrayobject.h
index fb9b0bd81..f7d0734db 100644
--- a/numpy/core/src/multiarray/arrayobject.h
+++ b/numpy/core/src/multiarray/arrayobject.h
@@ -26,4 +26,20 @@ array_might_be_written(PyArrayObject *obj);
  */
 static const int NPY_ARRAY_WARN_ON_WRITE = (1 << 31);
 
+
+/*
+ * These flags are used internally to indicate an array that was previously
+ * a Python scalar (int, float, complex).  The dtype of such an array should
+ * be considered as any integer, floating, or complex rather than the explicit
+ * dtype attached to the array.
+ *
+ * These flags must only be used in local context when the array in question
+ * is not returned.  Use three flags, to avoid having to double check the
+ * actual dtype when the flags are used.
+ */
+static const int NPY_ARRAY_WAS_PYTHON_INT = (1 << 30);
+static const int NPY_ARRAY_WAS_PYTHON_FLOAT = (1 << 29);
+static const int NPY_ARRAY_WAS_PYTHON_COMPLEX = (1 << 28);
+static const int NPY_ARRAY_WAS_PYTHON_LITERAL = (1 << 30 | 1 << 29 | 1 << 28);
+
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_ARRAYOBJECT_H_ */
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index a9f8dfdd2..7cd80ba9a 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -394,19 +394,9 @@ static int
         /* Overflow could have occured converting double to float */
         if (NPY_UNLIKELY((npy_isinf(temp.real) && !npy_isinf(oop.real)) ||
                          (npy_isinf(temp.imag) && !npy_isinf(oop.imag)))) {
-            int bufsize, errmask;
-            PyObject *errobj;
-
-            if (PyUFunc_GetPyValues("assignment", &bufsize, &errmask,
-                    &errobj) < 0) {
-                return -1;
-            }
-            int first = 1;
-            if (PyUFunc_handlefperr(errmask, errobj, NPY_FPE_OVERFLOW, &first)) {
-                Py_XDECREF(errobj);
+            if (PyUFunc_GiveFloatingpointErrors("cast", NPY_FPE_OVERFLOW) < 0) {
                 return -1;
             }
-            Py_XDECREF(errobj);
         }
 #endif
     }
@@ -716,6 +706,7 @@ OBJECT_getitem(void *ip, void *NPY_UNUSED(ap))
     PyObject *obj;
     memcpy(&obj, ip, sizeof(obj));
     if (obj == NULL) {
+        /* We support NULL, but still try to guarantee this never happens! */
         Py_RETURN_NONE;
     }
     else {
@@ -733,6 +724,7 @@ OBJECT_setitem(PyObject *op, void *ov, void *NPY_UNUSED(ap))
     memcpy(&obj, ov, sizeof(obj));
 
     Py_INCREF(op);
+    /* A newly created array/buffer may only be NULLed, so XDECREF */
     Py_XDECREF(obj);
 
     memcpy(ov, &op, sizeof(op));
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index bc8a3bf88..c578a1b44 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -31,6 +31,7 @@
 #include "array_method.h"
 #include "usertypes.h"
 #include "dtype_transfer.h"
+#include "arrayobject.h"
 
 
 /*
@@ -44,6 +45,11 @@
  */
 NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20};
 
+/*
+ * Whether or not legacy value-based promotion/casting is used.
+ */
+NPY_NO_EXPORT int npy_promotion_state = NPY_USE_LEGACY_PROMOTION;
+NPY_NO_EXPORT PyObject *NO_NEP50_WARNING_CTX = NULL;
 
 static PyObject *
 PyArray_GetGenericToVoidCastingImpl(void);
@@ -58,6 +64,77 @@ static PyObject *
 PyArray_GetObjectToGenericCastingImpl(void);
 
 
+/*
+ * Return 1 if promotion warnings should be given and 0 if they are currently
+ * suppressed in the local context.
+ */
+NPY_NO_EXPORT int
+npy_give_promotion_warnings(void)
+{
+    PyObject *val;
+
+    npy_cache_import(
+            "numpy.core._ufunc_config", "NO_NEP50_WARNING",
+            &NO_NEP50_WARNING_CTX);
+    if (NO_NEP50_WARNING_CTX == NULL) {
+        PyErr_WriteUnraisable(NULL);
+        return 1;
+    }
+
+    if (PyContextVar_Get(NO_NEP50_WARNING_CTX, Py_False, &val) < 0) {
+        /* Errors should not really happen, but if it does assume we warn. */
+        PyErr_WriteUnraisable(NULL);
+        return 1;
+    }
+    Py_DECREF(val);
+    /* only when the no-warnings context is false, we give warnings */
+    return val == Py_False;
+}
+
+
+NPY_NO_EXPORT PyObject *
+npy__get_promotion_state(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(arg)) {
+    if (npy_promotion_state == NPY_USE_WEAK_PROMOTION) {
+        return PyUnicode_FromString("weak");
+    }
+    else if (npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN) {
+        return PyUnicode_FromString("weak_and_warn");
+    }
+    else if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
+        return PyUnicode_FromString("legacy");
+    }
+    PyErr_SetString(PyExc_SystemError, "invalid promotion state!");
+    return NULL;
+}
+
+
+NPY_NO_EXPORT PyObject *
+npy__set_promotion_state(PyObject *NPY_UNUSED(mod), PyObject *arg)
+{
+    if (!PyUnicode_Check(arg)) {
+        PyErr_SetString(PyExc_TypeError,
+                "_set_promotion_state() argument or NPY_PROMOTION_STATE "
+                "must be a string.");
+        return NULL;
+    }
+    if (PyUnicode_CompareWithASCIIString(arg, "weak") == 0) {
+        npy_promotion_state = NPY_USE_WEAK_PROMOTION;
+    }
+    else if (PyUnicode_CompareWithASCIIString(arg, "weak_and_warn") == 0) {
+        npy_promotion_state = NPY_USE_WEAK_PROMOTION_AND_WARN;
+    }
+    else if (PyUnicode_CompareWithASCIIString(arg, "legacy") == 0) {
+        npy_promotion_state = NPY_USE_LEGACY_PROMOTION;
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                "_set_promotion_state() argument or NPY_PROMOTION_STATE must be "
+                "'weak', 'legacy', or 'weak_and_warn' but got '%.100S'", arg);
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
 /**
  * Fetch the casting implementation from one DType to another.
  *
@@ -776,6 +853,55 @@ can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data,
     return ret;
 }
 
+
+NPY_NO_EXPORT npy_bool
+can_cast_pyscalar_scalar_to(
+        int flags, PyArray_Descr *to, NPY_CASTING casting)
+{
+    /*
+     * This function only works reliably for legacy (NumPy dtypes).
+     * If we end up here for a non-legacy DType, it is a bug.
+     */
+    assert(NPY_DT_is_legacy(NPY_DTYPE(to)));
+
+    /*
+     * Quickly check for the typical numeric cases, where the casting rules
+     * can be hardcoded fairly easily.
+     */
+    if (PyDataType_ISCOMPLEX(to)) {
+        return 1;
+    }
+    else if (PyDataType_ISFLOAT(to)) {
+        if (flags & NPY_ARRAY_WAS_PYTHON_COMPLEX) {
+            return casting == NPY_UNSAFE_CASTING;
+        }
+        return 1;
+    }
+    else if (PyDataType_ISINTEGER(to)) {
+        if (!(flags & NPY_ARRAY_WAS_PYTHON_INT)) {
+            return casting == NPY_UNSAFE_CASTING;
+        }
+        return 1;
+    }
+
+    /*
+     * For all other cases we use the default dtype.
+     */
+    PyArray_Descr *from;
+    if (flags & NPY_ARRAY_WAS_PYTHON_INT) {
+        from = PyArray_DescrFromType(NPY_LONG);
+    }
+    else if (flags & NPY_ARRAY_WAS_PYTHON_FLOAT) {
+        from = PyArray_DescrFromType(NPY_DOUBLE);
+    }
+    else {
+        from = PyArray_DescrFromType(NPY_CDOUBLE);
+    }
+    int res = PyArray_CanCastTypeTo(from, to, casting);
+    Py_DECREF(from);
+    return res;
+}
+
 /*NUMPY_API
  * Returns 1 if the array object may be cast to the given data type using
  * the casting rule, 0 otherwise.  This differs from PyArray_CanCastTo in
@@ -794,12 +920,25 @@ PyArray_CanCastArrayTo(PyArrayObject *arr, PyArray_Descr *to,
         to = NULL;
     }
 
-    /*
-     * If it's a scalar, check the value.  (This only currently matters for
-     * numeric types and for `to == NULL` it can't be numeric.)
-     */
-    if (PyArray_NDIM(arr) == 0 && !PyArray_HASFIELDS(arr) && to != NULL) {
-        return can_cast_scalar_to(from, PyArray_DATA(arr), to, casting);
+    if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
+        /*
+         * If it's a scalar, check the value.  (This only currently matters for
+         * numeric types and for `to == NULL` it can't be numeric.)
+         */
+        if (PyArray_NDIM(arr) == 0 && !PyArray_HASFIELDS(arr) && to != NULL) {
+            return can_cast_scalar_to(from, PyArray_DATA(arr), to, casting);
+        }
+    }
+    else {
+        /*
+         * If it's a scalar, check the value.  (This only currently matters for
+         * numeric types and for `to == NULL` it can't be numeric.)
+         */
+        if (PyArray_FLAGS(arr) & NPY_ARRAY_WAS_PYTHON_LITERAL && to != NULL) {
+            return can_cast_pyscalar_scalar_to(
+                    PyArray_FLAGS(arr) & NPY_ARRAY_WAS_PYTHON_LITERAL, to,
+                    casting);
+        }
     }
 
     /* Otherwise, use the standard rules (same as `PyArray_CanCastTypeTo`) */
@@ -1561,40 +1700,44 @@ should_use_min_scalar(npy_intp narrs, PyArrayObject **arr,
 }
 
 
-/*
- * Utility function used only in PyArray_ResultType for value-based logic.
- * See that function for the meaning and contents of the parameters.
- */
-static PyArray_Descr *
-get_descr_from_cast_or_value(
-        npy_intp i,
-        PyArrayObject *arrs[],
-        npy_intp ndtypes,
-        PyArray_Descr *descriptor,
-        PyArray_DTypeMeta *common_dtype)
-{
-    PyArray_Descr *curr;
-    if (NPY_LIKELY(i < ndtypes ||
-            !(PyArray_FLAGS(arrs[i-ndtypes]) & _NPY_ARRAY_WAS_PYSCALAR))) {
-        curr = PyArray_CastDescrToDType(descriptor, common_dtype);
-    }
-    else {
-        /*
-         * Unlike `PyArray_CastToDTypeAndPromoteDescriptors`, deal with
-         * plain Python values "graciously". This recovers the original
-         * value the long route, but it should almost never happen...
-         */
-        PyObject *tmp = PyArray_GETITEM(arrs[i-ndtypes],
-                                        PyArray_BYTES(arrs[i-ndtypes]));
-        if (tmp == NULL) {
-            return NULL;
+NPY_NO_EXPORT int
+should_use_min_scalar_weak_literals(int narrs, PyArrayObject **arr) {
+    int all_scalars = 1;
+    int max_scalar_kind = -1;
+    int max_array_kind = -1;
+
+    for (int i = 0; i < narrs; i++) {
+        if (PyArray_FLAGS(arr[i]) & NPY_ARRAY_WAS_PYTHON_INT) {
+            /* A Python integer could be `u` so is effectively that: */
+            int new = dtype_kind_to_simplified_ordering('u');
+            if (new > max_scalar_kind) {
+                max_scalar_kind = new;
+            }
         }
-        curr = NPY_DT_CALL_discover_descr_from_pyobject(common_dtype, tmp);
-        Py_DECREF(tmp);
+        /* For the new logic, only complex or not matters: */
+        else if (PyArray_FLAGS(arr[i]) & NPY_ARRAY_WAS_PYTHON_FLOAT) {
+            max_scalar_kind = dtype_kind_to_simplified_ordering('f');
+        }
+        else if (PyArray_FLAGS(arr[i]) & NPY_ARRAY_WAS_PYTHON_COMPLEX) {
+            max_scalar_kind = dtype_kind_to_simplified_ordering('f');
+        }
+        else {
+            all_scalars = 0;
+            int kind = dtype_kind_to_simplified_ordering(
+                    PyArray_DESCR(arr[i])->kind);
+            if (kind > max_array_kind) {
+                max_array_kind = kind;
+            }
+        }
+    }
+    if (!all_scalars && max_array_kind >= max_scalar_kind) {
+        return 1;
     }
-    return curr;
+
+    return 0;
 }
 
+
 /*NUMPY_API
  *
  * Produces the result type of a bunch of inputs, using the same rules
@@ -1667,30 +1810,13 @@ PyArray_ResultType(
             at_least_one_scalar = 1;
         }
 
-        if (!(PyArray_FLAGS(arrs[i]) & _NPY_ARRAY_WAS_PYSCALAR)) {
-            /* This was not a scalar with an abstract DType */
-            all_descriptors[i_all] = PyArray_DTYPE(arrs[i]);
-            all_DTypes[i_all] = NPY_DTYPE(all_descriptors[i_all]);
-            Py_INCREF(all_DTypes[i_all]);
-            all_pyscalar = 0;
-            continue;
-        }
-
         /*
-         * The original was a Python scalar with an abstract DType.
-         * In a future world, this type of code may need to work on the
-         * DType level first and discover those from the original value.
-         * But, right now we limit the logic to int, float, and complex
-         * and do it here to allow for a transition without losing all of
-         * our remaining sanity.
+         * If the original was a Python scalar/literal, we use only the
+         * corresponding abstract DType (and no descriptor) below.
+         * Otherwise, we propagate the descriptor as well.
          */
-        if (PyArray_ISFLOAT(arrs[i])) {
-            all_DTypes[i_all] = &PyArray_PyFloatAbstractDType;
-        }
-        else if (PyArray_ISCOMPLEX(arrs[i])) {
-            all_DTypes[i_all] = &PyArray_PyComplexAbstractDType;
-        }
-        else {
+        all_descriptors[i_all] = NULL;  /* no descriptor for py-scalars */
+        if (PyArray_FLAGS(arrs[i]) & NPY_ARRAY_WAS_PYTHON_INT) {
             /* This could even be an object dtype here for large ints */
             all_DTypes[i_all] = &PyArray_PyIntAbstractDType;
             if (PyArray_TYPE(arrs[i]) != NPY_LONG) {
@@ -1698,12 +1824,18 @@ PyArray_ResultType(
                 all_pyscalar = 0;
             }
         }
+        else if (PyArray_FLAGS(arrs[i]) & NPY_ARRAY_WAS_PYTHON_FLOAT) {
+            all_DTypes[i_all] = &PyArray_PyFloatAbstractDType;
+        }
+        else if (PyArray_FLAGS(arrs[i]) & NPY_ARRAY_WAS_PYTHON_COMPLEX) {
+            all_DTypes[i_all] = &PyArray_PyComplexAbstractDType;
+        }
+        else {
+            all_descriptors[i_all] = PyArray_DTYPE(arrs[i]);
+            all_DTypes[i_all] = NPY_DTYPE(all_descriptors[i_all]);
+            all_pyscalar = 0;
+        }
         Py_INCREF(all_DTypes[i_all]);
-        /*
-         * Leave the descriptor empty, if we need it, we will have to go
-         * to more extreme lengths unfortunately.
-         */
-        all_descriptors[i_all] = NULL;
     }
 
     PyArray_DTypeMeta *common_dtype = PyArray_PromoteDTypeSequence(
@@ -1730,23 +1862,20 @@ PyArray_ResultType(
      * NOTE: Code duplicates `PyArray_CastToDTypeAndPromoteDescriptors`, but
      *       supports special handling of the abstract values.
      */
-    if (!NPY_DT_is_parametric(common_dtype)) {
-        /* Note that this "fast" path loses all metadata */
-        result = NPY_DT_CALL_default_descr(common_dtype);
-    }
-    else {
-        result = get_descr_from_cast_or_value(
-                    0, arrs, ndtypes, all_descriptors[0], common_dtype);
-        if (result == NULL) {
-            goto error;
-        }
-
-        for (npy_intp i = 1; i < ndtypes+narrs; i++) {
-            PyArray_Descr *curr = get_descr_from_cast_or_value(
-                    i, arrs, ndtypes, all_descriptors[i], common_dtype);
+    if (NPY_DT_is_parametric(common_dtype)) {
+        for (npy_intp i = 0; i < ndtypes+narrs; i++) {
+            if (all_descriptors[i] == NULL) {
+                continue;  /* originally a python scalar/literal */
+            }
+            PyArray_Descr *curr = PyArray_CastDescrToDType(
+                    all_descriptors[i], common_dtype);
             if (curr == NULL) {
                 goto error;
             }
+            if (result == NULL) {
+                result = curr;
+                continue;
+            }
             Py_SETREF(result, NPY_DT_SLOTS(common_dtype)->common_instance(result, curr));
             Py_DECREF(curr);
             if (result == NULL) {
@@ -1754,27 +1883,33 @@ PyArray_ResultType(
             }
         }
     }
+    if (result == NULL) {
+        /*
+         * If the DType is not parametric, or all were weak scalars,
+         * a result may not yet be set.
+         */
+        result = NPY_DT_CALL_default_descr(common_dtype);
+        if (result == NULL) {
+            goto error;
+        }
+    }
 
     /*
-     * Unfortunately, when 0-D "scalar" arrays are involved and mixed, we
-     * have to use the value-based logic.  The intention is to move away from
-     * the complex logic arising from it.  We thus fall back to the legacy
-     * version here.
-     * It may be possible to micro-optimize this to skip some of the above
-     * logic when this path is necessary.
+     * Unfortunately, when 0-D "scalar" arrays are involved and mixed, we *may*
+     * have to use the value-based logic.
+     * `PyArray_CheckLegacyResultType` may behave differently based on the
+     * current value of `npy_legacy_promotion`:
+     * 1. It does nothing (we use the "new" behavior)
+     * 2. It does nothing, but warns if there the result would differ.
+     * 3. It replaces the result based on the legacy value-based logic.
      */
     if (at_least_one_scalar && !all_pyscalar && result->type_num < NPY_NTYPES) {
-        PyArray_Descr *legacy_result = PyArray_LegacyResultType(
-                narrs, arrs, ndtypes, descrs);
-        if (legacy_result == NULL) {
-            /*
-             * Going from error to success should not really happen, but is
-             * probably OK if it does.
-             */
-            goto error;
+        if (PyArray_CheckLegacyResultType(
+                &result, narrs, arrs, ndtypes, descrs) < 0) {
+            Py_DECREF(common_dtype);
+            Py_DECREF(result);
+            return NULL;
         }
-        /* Return the old "legacy" result (could warn here if different) */
-        Py_SETREF(result, legacy_result);
     }
 
     Py_DECREF(common_dtype);
@@ -1802,38 +1937,39 @@ PyArray_ResultType(
  * of all the inputs.  Data types passed directly are treated as array
  * types.
  */
-NPY_NO_EXPORT PyArray_Descr *
-PyArray_LegacyResultType(
+NPY_NO_EXPORT int
+PyArray_CheckLegacyResultType(
+        PyArray_Descr **new_result,
         npy_intp narrs, PyArrayObject **arr,
         npy_intp ndtypes, PyArray_Descr **dtypes)
 {
+    PyArray_Descr *ret = NULL;
+    if (npy_promotion_state == NPY_USE_WEAK_PROMOTION) {
+        return 0;
+    }
+    if (npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN
+            && !npy_give_promotion_warnings()) {
+        return 0;
+    }
+
     npy_intp i;
 
-    /* If there's just one type, pass it through */
+    /* If there's just one type, results must match */
     if (narrs + ndtypes == 1) {
-        PyArray_Descr *ret = NULL;
-        if (narrs == 1) {
-            ret = PyArray_DESCR(arr[0]);
-        }
-        else {
-            ret = dtypes[0];
-        }
-        Py_INCREF(ret);
-        return ret;
+        return 0;
     }
 
     int use_min_scalar = should_use_min_scalar(narrs, arr, ndtypes, dtypes);
 
     /* Loop through all the types, promoting them */
     if (!use_min_scalar) {
-        PyArray_Descr *ret;
 
         /* Build a single array of all the dtypes */
         PyArray_Descr **all_dtypes = PyArray_malloc(
             sizeof(*all_dtypes) * (narrs + ndtypes));
         if (all_dtypes == NULL) {
             PyErr_NoMemory();
-            return NULL;
+            return -1;
         }
         for (i = 0; i < narrs; ++i) {
             all_dtypes[i] = PyArray_DESCR(arr[i]);
@@ -1843,11 +1979,9 @@ PyArray_LegacyResultType(
         }
         ret = PyArray_PromoteTypeSequence(all_dtypes, narrs + ndtypes);
         PyArray_free(all_dtypes);
-        return ret;
     }
     else {
         int ret_is_small_unsigned = 0;
-        PyArray_Descr *ret = NULL;
 
         for (i = 0; i < narrs; ++i) {
             int tmp_is_small_unsigned;
@@ -1855,7 +1989,7 @@ PyArray_LegacyResultType(
                 arr[i], &tmp_is_small_unsigned);
             if (tmp == NULL) {
                 Py_XDECREF(ret);
-                return NULL;
+                return -1;
             }
             /* Combine it with the existing type */
             if (ret == NULL) {
@@ -1869,7 +2003,7 @@ PyArray_LegacyResultType(
                 Py_DECREF(ret);
                 ret = tmpret;
                 if (ret == NULL) {
-                    return NULL;
+                    return -1;
                 }
 
                 ret_is_small_unsigned = tmp_is_small_unsigned &&
@@ -1890,7 +2024,7 @@ PyArray_LegacyResultType(
                 Py_DECREF(ret);
                 ret = tmpret;
                 if (ret == NULL) {
-                    return NULL;
+                    return -1;
                 }
             }
         }
@@ -1899,9 +2033,32 @@ PyArray_LegacyResultType(
             PyErr_SetString(PyExc_TypeError,
                     "no arrays or types available to calculate result type");
         }
+    }
+
+    if (ret == NULL) {
+        return -1;
+    }
 
-        return ret;
+    int unchanged_result = PyArray_EquivTypes(*new_result, ret);
+    if (unchanged_result) {
+        Py_DECREF(ret);
+        return 0;
     }
+    if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
+        Py_SETREF(*new_result, ret);
+        return 0;
+    }
+
+    assert(npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN);
+    if (PyErr_WarnFormat(PyExc_UserWarning, 1,
+            "result dtype changed due to the removal of value-based "
+            "promotion from NumPy. Changed from %S to %S.",
+            ret, *new_result) < 0) {
+        Py_DECREF(ret);
+        return -1;
+    }
+    Py_DECREF(ret);
+    return 0;
 }
 
 /**
diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h
index af6d790cf..b6bc7d8a7 100644
--- a/numpy/core/src/multiarray/convert_datatype.h
+++ b/numpy/core/src/multiarray/convert_datatype.h
@@ -9,6 +9,21 @@ extern "C" {
 
 extern NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[];
 
+#define NPY_USE_LEGACY_PROMOTION 0
+#define NPY_USE_WEAK_PROMOTION 1
+#define NPY_USE_WEAK_PROMOTION_AND_WARN 2
+extern NPY_NO_EXPORT int npy_promotion_state;
+extern NPY_NO_EXPORT PyObject *NO_NEP50_WARNING_CTX;
+
+NPY_NO_EXPORT int
+npy_give_promotion_warnings(void);
+
+NPY_NO_EXPORT PyObject *
+npy__get_promotion_state(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(arg));
+
+NPY_NO_EXPORT PyObject *
+npy__set_promotion_state(PyObject *NPY_UNUSED(mod), PyObject *arg);
+
 NPY_NO_EXPORT PyObject *
 PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to);
 
@@ -24,8 +39,9 @@ PyArray_ObjectType(PyObject *op, int minimum_type);
 NPY_NO_EXPORT PyArrayObject **
 PyArray_ConvertToCommonType(PyObject *op, int *retn);
 
-NPY_NO_EXPORT PyArray_Descr *
-PyArray_LegacyResultType(
+NPY_NO_EXPORT int
+PyArray_CheckLegacyResultType(
+        PyArray_Descr **new_result,
         npy_intp narrs, PyArrayObject **arr,
         npy_intp ndtypes, PyArray_Descr **dtypes);
 
@@ -40,10 +56,17 @@ NPY_NO_EXPORT npy_bool
 can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data,
                    PyArray_Descr *to, NPY_CASTING casting);
 
+NPY_NO_EXPORT npy_bool
+can_cast_pyscalar_scalar_to(
+        int flags, PyArray_Descr *to, NPY_CASTING casting);
+
 NPY_NO_EXPORT int
 should_use_min_scalar(npy_intp narrs, PyArrayObject **arr,
                       npy_intp ndtypes, PyArray_Descr **dtypes);
 
+NPY_NO_EXPORT int
+should_use_min_scalar_weak_literals(int narrs, PyArrayObject **arr);
+
 NPY_NO_EXPORT const char *
 npy_casting_to_string(NPY_CASTING casting);
 
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index c3d66dd6b..ebd990724 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1068,6 +1068,18 @@ PyArray_NewLikeArrayWithShape(PyArrayObject *prototype, NPY_ORDER order,
                                         0,
                                         subok ? (PyObject *)prototype : NULL);
     }
+    if (ret == NULL) {
+        return NULL;
+    }
+
+    /* Logic shared by `empty`, `empty_like`, and `ndarray.__new__` */
+    if (PyDataType_REFCHK(PyArray_DESCR((PyArrayObject *)ret))) {
+        PyArray_FillObjectArray((PyArrayObject *)ret, Py_None);
+        if (PyErr_Occurred()) {
+            Py_DECREF(ret);
+            return NULL;
+        }
+    }
 
     return ret;
 }
@@ -2979,22 +2991,23 @@ PyArray_Empty(int nd, npy_intp const *dims, PyArray_Descr *type, int is_f_order)
      * PyArray_NewFromDescr steals a ref,
      * but we need to look at type later.
      * */
-    Py_INCREF(type);
-
     ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
                                                 type, nd, dims,
                                                 NULL, NULL,
                                                 is_f_order, NULL);
-    if (ret != NULL && PyDataType_REFCHK(type)) {
+    if (ret == NULL) {
+        return NULL;
+    }
+
+    /* Logic shared by `empty`, `empty_like`, and `ndarray.__new__` */
+    if (PyDataType_REFCHK(PyArray_DESCR(ret))) {
         PyArray_FillObjectArray(ret, Py_None);
         if (PyErr_Occurred()) {
             Py_DECREF(ret);
-            Py_DECREF(type);
             return NULL;
         }
     }
 
-    Py_DECREF(type);
     return (PyObject *)ret;
 }
 
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 99096be56..70b54f26a 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -3904,7 +3904,8 @@ datetime_to_timedelta_resolve_descriptors(
         PyArrayMethodObject *NPY_UNUSED(self),
         PyArray_DTypeMeta *dtypes[2],
         PyArray_Descr *given_descrs[2],
-        PyArray_Descr *loop_descrs[2])
+        PyArray_Descr *loop_descrs[2],
+        npy_intp *NPY_UNUSED(view_offset))
 {
     loop_descrs[0] = NPY_DT_CALL_ensure_canonical(given_descrs[0]);
     if (loop_descrs[0] == NULL) {
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 96d0c893d..16069d619 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -3537,10 +3537,11 @@ array_result_type(PyObject *NPY_UNUSED(dummy), PyObject *args)
             if (arr[narr] == NULL) {
                 goto finish;
             }
-            if (PyLong_CheckExact(obj) || PyFloat_CheckExact(obj) ||
-                    PyComplex_CheckExact(obj)) {
-                ((PyArrayObject_fields *)arr[narr])->flags |= _NPY_ARRAY_WAS_PYSCALAR;
-            }
+            /*
+             * Mark array if it was a python scalar (we do not need the actual
+             * DType here yet, this is figured out inside ResultType.
+             */
+            npy_mark_tmp_array_if_pyscalar(obj, arr[narr], NULL);
             ++narr;
         }
         else {
@@ -4494,6 +4495,14 @@ static struct PyMethodDef array_module_methods[] = {
     {"get_handler_version",
         (PyCFunction) get_handler_version,
         METH_VARARGS, NULL},
+    {"_get_promotion_state",
+        (PyCFunction)npy__get_promotion_state,
+        METH_NOARGS, "Get the current NEP 50 promotion state."},
+    {"_set_promotion_state",
+         (PyCFunction)npy__set_promotion_state,
+         METH_O, "Set the NEP 50 promotion state.  This is not thread-safe.\n"
+                 "The optional warnings can be safely silenced using the \n"
+                 "`np._no_nep50_warning()` context manager."},
     {"_add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc,
         METH_VARARGS, NULL},
     {"_get_sfloat_dtype",
diff --git a/numpy/core/src/npysort/binsearch.cpp b/numpy/core/src/npysort/binsearch.cpp
index 98d305910..ea07bbb0c 100644
--- a/numpy/core/src/npysort/binsearch.cpp
+++ b/numpy/core/src/npysort/binsearch.cpp
@@ -330,9 +330,9 @@ struct binsearch_t : binsearch_base<arg> {
             npy::bool_tag, npy::byte_tag, npy::ubyte_tag, npy::short_tag,
             npy::ushort_tag, npy::int_tag, npy::uint_tag, npy::long_tag,
             npy::ulong_tag, npy::longlong_tag, npy::ulonglong_tag,
-            npy::half_tag, npy::float_tag, npy::double_tag,
-            npy::longdouble_tag, npy::cfloat_tag, npy::cdouble_tag,
-            npy::clongdouble_tag, npy::datetime_tag, npy::timedelta_tag>;
+            npy::float_tag, npy::double_tag, npy::longdouble_tag, 
+            npy::cfloat_tag, npy::cdouble_tag, npy::clongdouble_tag, 
+            npy::datetime_tag, npy::timedelta_tag, npy::half_tag>;
 
     static constexpr std::array<value_type, taglist::size> map =
             make_binsearch_map(taglist());
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index 620335d88..5aecdd1fc 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -40,6 +40,7 @@
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+#include <convert_datatype.h>
 
 #include "numpy/ndarraytypes.h"
 #include "common.h"
@@ -595,7 +596,8 @@ _make_new_typetup(
 static int
 legacy_promote_using_legacy_type_resolver(PyUFuncObject *ufunc,
         PyArrayObject *const *ops, PyArray_DTypeMeta *signature[],
-        PyArray_DTypeMeta *operation_DTypes[], int *out_cacheable)
+        PyArray_DTypeMeta *operation_DTypes[], int *out_cacheable,
+        npy_bool check_only)
 {
     int nargs = ufunc->nargs;
     PyArray_Descr *out_descrs[NPY_MAXARGS] = {NULL};
@@ -623,6 +625,42 @@ legacy_promote_using_legacy_type_resolver(PyUFuncObject *ufunc,
     }
     Py_XDECREF(type_tuple);
 
+    if (NPY_UNLIKELY(check_only)) {
+        /*
+         * When warnings are enabled, we don't replace the DTypes, but only
+         * check whether the old result is the same as the new one.
+         * For noise reason, we do this only on the *output* dtypes which
+         * ignores floating point precision changes for comparisons such as
+         * `np.float32(3.1) < 3.1`.
+         */
+        for (int i = ufunc->nin; i < ufunc->nargs; i++) {
+            /*
+             * If an output was provided and the new dtype matches, we
+             * should (at best) lose a tiny bit of precision, e.g.:
+             * `np.true_divide(float32_arr0d, 1, out=float32_arr0d)`
+             * (which operated on float64 before, although it is probably rare)
+             */
+            if (ops[i] != NULL
+                    && PyArray_EquivTypenums(
+                            operation_DTypes[i]->type_num,
+                            PyArray_DESCR(ops[i])->type_num)) {
+                continue;
+            }
+            /* Otherwise, warn if the dtype doesn't match */
+            if (!PyArray_EquivTypenums(
+                    operation_DTypes[i]->type_num, out_descrs[i]->type_num)) {
+                if (PyErr_WarnFormat(PyExc_UserWarning, 1,
+                        "result dtype changed due to the removal of value-based "
+                        "promotion from NumPy. Changed from %S to %S.",
+                        out_descrs[i], operation_DTypes[i]->singleton) < 0) {
+                    return -1;
+                }
+                return 0;
+            }
+        }
+        return 0;
+    }
+
     for (int i = 0; i < nargs; i++) {
         Py_XSETREF(operation_DTypes[i], NPY_DTYPE(out_descrs[i]));
         Py_INCREF(operation_DTypes[i]);
@@ -773,7 +811,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
     PyArray_DTypeMeta *new_op_dtypes[NPY_MAXARGS] = {NULL};
     int cacheable = 1;  /* TODO: only the comparison deprecation needs this */
     if (legacy_promote_using_legacy_type_resolver(ufunc,
-            ops, signature, new_op_dtypes, &cacheable) < 0) {
+            ops, signature, new_op_dtypes, &cacheable, NPY_FALSE) < 0) {
         return NULL;
     }
     info = promote_and_get_info_and_ufuncimpl(ufunc,
@@ -852,6 +890,11 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
  *        these including clearing the output.
  * @param force_legacy_promotion If set, we have to use the old type resolution
  *        to implement value-based promotion/casting.
+ * @param promoting_pyscalars Indication that some of the initial inputs were
+ *        int, float, or complex.  In this case weak-scalar promotion is used
+ *        which can lead to a lower result precision even when legacy promotion
+ *        does not kick in: `np.int8(1) + 1` is the example.
+ *        (Legacy promotion is skipped because `np.int8(1)` is also scalar)
  * @param ensure_reduce_compatible Must be set for reductions, in which case
  *        the found implementation is checked for reduce-like compatibility.
  *        If it is *not* compatible and `signature[2] != NULL`, we assume its
@@ -867,6 +910,7 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
         PyArray_DTypeMeta *op_dtypes[],
         npy_bool force_legacy_promotion,
         npy_bool allow_legacy_promotion,
+        npy_bool promoting_pyscalars,
         npy_bool ensure_reduce_compatible)
 {
     int nin = ufunc->nin, nargs = ufunc->nargs;
@@ -896,7 +940,8 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
         }
     }
 
-    if (force_legacy_promotion) {
+    if (force_legacy_promotion
+            && npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
         /*
          * We must use legacy promotion for value-based logic. Call the old
          * resolver once up-front to get the "actual" loop dtypes.
@@ -904,13 +949,17 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
          */
         int cacheable = 1;  /* unused, as we modify the original `op_dtypes` */
         if (legacy_promote_using_legacy_type_resolver(ufunc,
-                ops, signature, op_dtypes, &cacheable) < 0) {
+                ops, signature, op_dtypes, &cacheable, NPY_FALSE) < 0) {
             return NULL;
         }
     }
 
+    /* Pause warnings and always use "new" path */
+    int old_promotion_state = npy_promotion_state;
+    npy_promotion_state = NPY_USE_WEAK_PROMOTION;
     PyObject *info = promote_and_get_info_and_ufuncimpl(ufunc,
             ops, signature, op_dtypes, allow_legacy_promotion);
+    npy_promotion_state = old_promotion_state;
 
     if (info == NULL) {
         if (!PyErr_Occurred()) {
@@ -920,6 +969,27 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
     }
 
     PyArrayMethodObject *method = (PyArrayMethodObject *)PyTuple_GET_ITEM(info, 1);
+    PyObject *all_dtypes = PyTuple_GET_ITEM(info, 0);
+
+    /* If necessary, check if the old result would have been different */
+    if (NPY_UNLIKELY(npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN)
+            && (force_legacy_promotion || promoting_pyscalars)
+            && npy_give_promotion_warnings()) {
+        PyArray_DTypeMeta *check_dtypes[NPY_MAXARGS];
+        for (int i = 0; i < nargs; i++) {
+            check_dtypes[i] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(
+                    all_dtypes, i);
+        }
+        /* Before calling to the legacy promotion, pretend that is the state: */
+        npy_promotion_state = NPY_USE_LEGACY_PROMOTION;
+        int res = legacy_promote_using_legacy_type_resolver(ufunc,
+                ops, signature, check_dtypes, NULL, NPY_TRUE);
+        /* Reset the promotion state: */
+        npy_promotion_state = NPY_USE_WEAK_PROMOTION_AND_WARN;
+        if (res < 0) {
+            return NULL;
+        }
+    }
 
     /*
      * In certain cases (only the logical ufuncs really), the loop we found may
@@ -931,14 +1001,14 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
      *       comment.  That could be relaxed, in which case we may need to
      *       cache if a call was for a reduction.
      */
-    PyObject *all_dtypes = PyTuple_GET_ITEM(info, 0);
     if (ensure_reduce_compatible && signature[0] == NULL &&
             PyTuple_GET_ITEM(all_dtypes, 0) != PyTuple_GET_ITEM(all_dtypes, 2)) {
         signature[0] = (PyArray_DTypeMeta *)PyTuple_GET_ITEM(all_dtypes, 2);
         Py_INCREF(signature[0]);
         return promote_and_get_ufuncimpl(ufunc,
                 ops, signature, op_dtypes,
-                force_legacy_promotion, allow_legacy_promotion, NPY_FALSE);
+                force_legacy_promotion, allow_legacy_promotion,
+                promoting_pyscalars, NPY_FALSE);
     }
 
     for (int i = 0; i < nargs; i++) {
diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h
index f2ab0be2e..513b50d75 100644
--- a/numpy/core/src/umath/dispatching.h
+++ b/numpy/core/src/umath/dispatching.h
@@ -27,6 +27,7 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
         PyArray_DTypeMeta *op_dtypes[],
         npy_bool force_legacy_promotion,
         npy_bool allow_legacy_promotion,
+        npy_bool promote_pyscalars,
         npy_bool ensure_reduce_compatible);
 
 NPY_NO_EXPORT PyObject *
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 9ae686399..e5104db81 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -214,6 +214,8 @@ PyUFunc_O_O(char **args, npy_intp const *dimensions, npy_intp const *steps, void
     UNARY_LOOP {
         PyObject *in1 = *(PyObject **)ip1;
         PyObject **out = (PyObject **)op1;
+        /* We allow NULL, but try to guarantee non-NULL to downstream */
+        assert(in1 != NULL);
         PyObject *ret = f(in1 ? in1 : Py_None);
         if (ret == NULL) {
             return;
@@ -231,6 +233,8 @@ PyUFunc_O_O_method(char **args, npy_intp const *dimensions, npy_intp const *step
     UNARY_LOOP {
         PyObject *in1 = *(PyObject **)ip1;
         PyObject **out = (PyObject **)op1;
+        /* We allow NULL, but try to guarantee non-NULL to downstream */
+        assert(in1 != NULL);
         PyObject *ret, *func;
         func = PyObject_GetAttrString(in1 ? in1 : Py_None, meth);
         if (func != NULL && !PyCallable_Check(func)) {
@@ -268,6 +272,9 @@ PyUFunc_OO_O(char **args, npy_intp const *dimensions, npy_intp const *steps, voi
         PyObject *in1 = *(PyObject **)ip1;
         PyObject *in2 = *(PyObject **)ip2;
         PyObject **out = (PyObject **)op1;
+        /* We allow NULL, but try to guarantee non-NULL to downstream */
+        assert(in1 != NULL);
+        assert(in2 != NULL);
         PyObject *ret = f(in1 ? in1 : Py_None, in2 ? in2 : Py_None);
         if (ret == NULL) {
             return;
@@ -286,6 +293,10 @@ PyUFunc_OOO_O(char **args, npy_intp const *dimensions, npy_intp const *steps, vo
         PyObject *in2 = *(PyObject **)ip2;
         PyObject *in3 = *(PyObject **)ip3;
         PyObject **out = (PyObject **)op1;
+        /* We allow NULL, but try to guarantee non-NULL to downstream */
+        assert(in1 != NULL);
+        assert(in2 != NULL);
+        assert(in3 != NULL);
         PyObject *ret = f(
             in1 ? in1 : Py_None,
             in2 ? in2 : Py_None,
@@ -308,6 +319,9 @@ PyUFunc_OO_O_method(char **args, npy_intp const *dimensions, npy_intp const *ste
         PyObject *in1 = *(PyObject **)ip1;
         PyObject *in2 = *(PyObject **)ip2;
         PyObject **out = (PyObject **)op1;
+        /* We allow NULL, but try to guarantee non-NULL to downstream */
+        assert(in1 != NULL);
+        assert(in2 != NULL);
         PyObject *ret = PyObject_CallMethod(in1 ? in1 : Py_None,
                                             meth, "(O)", in2);
         if (ret == NULL) {
@@ -349,6 +363,8 @@ PyUFunc_On_Om(char **args, npy_intp const *dimensions, npy_intp const *steps, vo
         }
         for(j = 0; j < nin; j++) {
             in = *((PyObject **)ptrs[j]);
+            /* We allow NULL, but try to guarantee non-NULL to downstream */
+            assert(in != NULL);
             if (in == NULL) {
                 in = Py_None;
             }
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index ef608378a..c322ca33d 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -29,6 +29,8 @@
 #include "array_coercion.h"
 #include "common.h"
 #include "can_cast_table.h"
+#include "umathmodule.h"
+
 
 /* TODO: Used for some functions, should possibly move these to npy_math.h */
 #include "loops.h"
@@ -1162,6 +1164,14 @@ convert_to_@name@(PyObject *value, @type@ *result, npy_bool *may_need_deferring)
  *          (Half, Float, Double, LongDouble)*3#
  */
 #define IS_@name@
+/* drop the "true_" from "true_divide" for floating point warnings: */
+#define IS_@oper@
+#ifdef IS_true_divide
+    #define OP_NAME "divide"
+#else
+    #define OP_NAME "@oper@"
+#endif
+#undef IS_@oper@
 
 static PyObject *
 @name@_@oper@(PyObject *a, PyObject *b)
@@ -1281,19 +1291,9 @@ static PyObject *
     retstatus |= npy_get_floatstatus_barrier((char*)&out);
 #endif
     if (retstatus) {
-        int bufsize, errmask;
-        PyObject *errobj;
-
-        if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask,
-                                &errobj) < 0) {
-            return NULL;
-        }
-        int first = 1;
-        if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
-            Py_XDECREF(errobj);
+        if (PyUFunc_GiveFloatingpointErrors("scalar " OP_NAME, retstatus) < 0) {
             return NULL;
         }
-        Py_XDECREF(errobj);
     }
 
 
@@ -1327,6 +1327,7 @@ static PyObject *
 }
 
 
+#undef OP_NAME
 #undef IS_@name@
 
 /**end repeat**/
@@ -1449,19 +1450,9 @@ static PyObject *
     retstatus |= npy_get_floatstatus_barrier((char*)&out);
 #endif
     if (retstatus) {
-        int bufsize, errmask;
-        PyObject *errobj;
-
-        if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask,
-                                &errobj) < 0) {
-            return NULL;
-        }
-        int first = 1;
-        if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
-            Py_XDECREF(errobj);
+        if (PyUFunc_GiveFloatingpointErrors("scalar power", retstatus) < 0) {
             return NULL;
         }
-        Py_XDECREF(errobj);
     }
 
     ret = PyArrayScalar_New(@Name@);
@@ -1581,19 +1572,9 @@ static PyObject *
     int retstatus = @name@_ctype_@oper@(val, &out);
 
     if (retstatus) {
-        int bufsize, errmask;
-        PyObject *errobj;
-
-        if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask,
-                                &errobj) < 0) {
-            return NULL;
-        }
-        int first = 1;
-        if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
-            Py_XDECREF(errobj);
+        if (PyUFunc_GiveFloatingpointErrors("scalar @oper@", retstatus) < 0) {
             return NULL;
         }
-        Py_XDECREF(errobj);
     }
 
     /*
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 2636396d3..b7e390abb 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -49,6 +49,8 @@
 #include "override.h"
 #include "npy_import.h"
 #include "extobj.h"
+
+#include "arrayobject.h"
 #include "common.h"
 #include "dtypemeta.h"
 #include "numpyos.h"
@@ -945,6 +947,7 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
         ufunc_full_args full_args, PyArrayObject *out_op[],
         PyArray_DTypeMeta *out_op_DTypes[],
         npy_bool *force_legacy_promotion, npy_bool *allow_legacy_promotion,
+        npy_bool *promoting_pyscalars,
         PyObject *order_obj, NPY_ORDER *out_order,
         PyObject *casting_obj, NPY_CASTING *out_casting,
         PyObject *subok_obj, npy_bool *out_subok,
@@ -961,6 +964,7 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
     npy_bool any_scalar = NPY_FALSE;
     *allow_legacy_promotion = NPY_TRUE;
     *force_legacy_promotion = NPY_FALSE;
+    *promoting_pyscalars = NPY_FALSE;
     for (int i = 0; i < nin; i++) {
         obj = PyTuple_GET_ITEM(full_args.in, i);
 
@@ -980,6 +984,8 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
 
         if (!NPY_DT_is_legacy(out_op_DTypes[i])) {
             *allow_legacy_promotion = NPY_FALSE;
+            // TODO: A subclass of int, float, complex could reach here and
+            //       it should not be flagged as "weak" if it does.
         }
         if (PyArray_NDIM(out_op[i]) == 0) {
             any_scalar = NPY_TRUE;
@@ -988,17 +994,24 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
             all_scalar = NPY_FALSE;
             continue;
         }
+
+        // TODO: Is this equivalent/better by removing the logic which enforces
+        //       that we always use weak promotion in the core?
+        if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
+            continue;  /* Skip use of special dtypes */
+        }
+
         /*
-         * TODO: we need to special case scalars here, if the input is a
-         *       Python int, float, or complex, we have to use the "weak"
-         *       DTypes: `PyArray_PyIntAbstractDType`, etc.
-         *       This is to allow e.g. `float32(1.) + 1` to return `float32`.
-         *       The correct array dtype can only be found after promotion for
-         *       such a "weak scalar".  We could avoid conversion here, but
-         *       must convert it for use in the legacy promotion.
-         *       There is still a small chance that this logic can instead
-         *       happen inside the Python operators.
+         * Handle the "weak" Python scalars/literals.  We use a special DType
+         * for these.
+         * Further, we mark the operation array with a special flag to indicate
+         * this.  This is because the legacy dtype resolution makes use of
+         * `np.can_cast(operand, dtype)`.  The flag is local to this use, but
+         * necessary to propagate the information to the legacy type resolution.
          */
+        if (npy_mark_tmp_array_if_pyscalar(obj, out_op[i], &out_op_DTypes[i])) {
+            *promoting_pyscalars = NPY_TRUE;
+        }
     }
     if (*allow_legacy_promotion && (!all_scalar && any_scalar)) {
         *force_legacy_promotion = should_use_min_scalar(nin, out_op, 0, NULL);
@@ -2768,7 +2781,8 @@ reducelike_promote_and_resolve(PyUFuncObject *ufunc,
     }
 
     PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
-            ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE, NPY_TRUE);
+            ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE,
+            NPY_FALSE, NPY_TRUE);
     if (evil_ndim_mutating_hack) {
         ((PyArrayObject_fields *)out)->nd = 0;
     }
@@ -4875,10 +4889,13 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
     int keepdims = -1;  /* We need to know if it was passed */
     npy_bool force_legacy_promotion;
     npy_bool allow_legacy_promotion;
+    npy_bool promoting_pyscalars;
     if (convert_ufunc_arguments(ufunc,
             /* extract operand related information: */
             full_args, operands,
-            operand_DTypes, &force_legacy_promotion, &allow_legacy_promotion,
+            operand_DTypes,
+            &force_legacy_promotion, &allow_legacy_promotion,
+            &promoting_pyscalars,
             /* extract general information: */
             order_obj, &order,
             casting_obj, &casting,
@@ -4899,7 +4916,7 @@ ufunc_generic_fastcall(PyUFuncObject *ufunc,
     PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
             operands, signature,
             operand_DTypes, force_legacy_promotion, allow_legacy_promotion,
-            NPY_FALSE);
+            promoting_pyscalars, NPY_FALSE);
     if (ufuncimpl == NULL) {
         goto fail;
     }
@@ -6032,7 +6049,8 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
 
     PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
             operands, signature, operand_DTypes,
-            force_legacy_promotion, allow_legacy_promotion, NPY_FALSE);
+            force_legacy_promotion, allow_legacy_promotion,
+            NPY_FALSE, NPY_FALSE);
     if (ufuncimpl == NULL) {
         goto fail;
     }
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 6edd00e65..94338e031 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -49,6 +49,7 @@
 #endif
 
 #include <stdbool.h>
+#include <arrayobject.h>
 
 static PyObject *
 npy_casting_to_py_object(NPY_CASTING casting)
@@ -1929,14 +1930,21 @@ linear_search_type_resolver(PyUFuncObject *self,
     int types[NPY_MAXARGS];
     const char *ufunc_name;
     int no_castable_output = 0;
-    int use_min_scalar;
 
     /* For making a better error message on coercion error */
     char err_dst_typecode = '-', err_src_typecode = '-';
 
     ufunc_name = ufunc_get_name_cstr(self);
 
-    use_min_scalar = should_use_min_scalar(nin, op, 0, NULL);
+    assert(npy_promotion_state != NPY_USE_WEAK_PROMOTION_AND_WARN);
+    /* Always "use" with new promotion in case of Python int/float/complex */
+    int use_min_scalar;
+    if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
+        use_min_scalar = should_use_min_scalar(nin, op, 0, NULL);
+    }
+    else {
+        use_min_scalar = should_use_min_scalar_weak_literals(nin, op);
+    }
 
     /* If the ufunc has userloops, search for them. */
     if (self->userloops) {
@@ -2126,11 +2134,19 @@ type_tuple_type_resolver(PyUFuncObject *self,
     int nin = self->nin, nop = nin + self->nout;
     int specified_types[NPY_MAXARGS];
     const char *ufunc_name;
-    int no_castable_output = 0, use_min_scalar;
+    int no_castable_output = 0;
 
     ufunc_name = ufunc_get_name_cstr(self);
 
-    use_min_scalar = should_use_min_scalar(nin, op, 0, NULL);
+    assert(npy_promotion_state != NPY_USE_WEAK_PROMOTION_AND_WARN);
+    /* Always "use" with new promotion in case of Python int/float/complex */
+    int use_min_scalar;
+    if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) {
+        use_min_scalar = should_use_min_scalar(nin, op, 0, NULL);
+    }
+    else {
+        use_min_scalar = should_use_min_scalar_weak_literals(nin, op);
+    }
 
     /* Fill in specified_types from the tuple or string */
     const char *bad_type_tup_msg = (
diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py
index 5c5ff55b4..16ecb1943 100644
--- a/numpy/core/tests/test_casting_unittests.py
+++ b/numpy/core/tests/test_casting_unittests.py
@@ -10,6 +10,7 @@ import pytest
 import textwrap
 import enum
 import random
+import ctypes
 
 import numpy as np
 from numpy.lib.stride_tricks import as_strided
@@ -786,7 +787,8 @@ class TestCasting:
         # None to <other> casts may succeed or fail, but a NULL'ed array must
         # behave the same as one filled with None's.
         arr_normal = np.array([None] * 5)
-        arr_NULLs = np.empty_like([None] * 5)
+        arr_NULLs = np.empty_like(arr_normal)
+        ctypes.memset(arr_NULLs.ctypes.data, 0, arr_NULLs.nbytes)
         # If the check fails (maybe it should) the test would lose its purpose:
         assert arr_NULLs.tobytes() == b"\x00" * arr_NULLs.nbytes
 
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index b37bded73..f95f95893 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -11,7 +11,7 @@ from numpy.core._rational_tests import rational
 from numpy.core._multiarray_tests import create_custom_field_dtype
 from numpy.testing import (
     assert_, assert_equal, assert_array_equal, assert_raises, HAS_REFCOUNT,
-    IS_PYSTON)
+    IS_PYSTON, _OLD_PROMOTION)
 from numpy.compat import pickle
 from itertools import permutations
 import random
@@ -1288,25 +1288,34 @@ class TestPromotion:
     """Test cases related to more complex DType promotions.  Further promotion
     tests are defined in `test_numeric.py`
     """
-    @pytest.mark.parametrize(["other", "expected"],
-            [(2**16-1, np.complex64),
-             (2**32-1, np.complex128),
-             (np.float16(2), np.complex64),
-             (np.float32(2), np.complex64),
-             (np.longdouble(2), np.complex64),
+    @np._no_nep50_warning()
+    @pytest.mark.parametrize(["other", "expected", "expected_weak"],
+            [(2**16-1, np.complex64, None),
+             (2**32-1, np.complex128, np.complex64),
+             (np.float16(2), np.complex64, None),
+             (np.float32(2), np.complex64, None),
+             (np.longdouble(2), np.complex64, np.clongdouble),
              # Base of the double value to sidestep any rounding issues:
-             (np.longdouble(np.nextafter(1.7e308, 0.)), np.complex128),
+             (np.longdouble(np.nextafter(1.7e308, 0.)),
+                  np.complex128, np.clongdouble),
              # Additionally use "nextafter" so the cast can't round down:
-             (np.longdouble(np.nextafter(1.7e308, np.inf)), np.clongdouble),
+             (np.longdouble(np.nextafter(1.7e308, np.inf)),
+                  np.clongdouble, None),
              # repeat for complex scalars:
-             (np.complex64(2), np.complex64),
-             (np.clongdouble(2), np.complex64),
+             (np.complex64(2), np.complex64, None),
+             (np.clongdouble(2), np.complex64, np.clongdouble),
              # Base of the double value to sidestep any rounding issues:
-             (np.clongdouble(np.nextafter(1.7e308, 0.) * 1j), np.complex128),
+             (np.clongdouble(np.nextafter(1.7e308, 0.) * 1j),
+                  np.complex128, np.clongdouble),
              # Additionally use "nextafter" so the cast can't round down:
-             (np.clongdouble(np.nextafter(1.7e308, np.inf)), np.clongdouble),
+             (np.clongdouble(np.nextafter(1.7e308, np.inf)),
+                  np.clongdouble, None),
              ])
-    def test_complex_other_value_based(self, other, expected):
+    def test_complex_other_value_based(self,
+            weak_promotion, other, expected, expected_weak):
+        if weak_promotion and expected_weak is not None:
+            expected = expected_weak
+
         # This would change if we modify the value based promotion
         min_complex = np.dtype(np.complex64)
 
@@ -1339,7 +1348,7 @@ class TestPromotion:
 
     def test_complex_pyscalar_promote_rational(self):
         with pytest.raises(TypeError,
-                match=r".* do not have a common DType"):
+                match=r".* no common DType exists for the given inputs"):
             np.result_type(1j, rational)
 
         with pytest.raises(TypeError,
@@ -1358,13 +1367,21 @@ class TestPromotion:
 
     @pytest.mark.parametrize(["other", "expected"],
             [(1, rational), (1., np.float64)])
-    def test_float_int_pyscalar_promote_rational(self, other, expected):
+    @np._no_nep50_warning()
+    def test_float_int_pyscalar_promote_rational(
+            self, weak_promotion, other, expected):
         # Note that rationals are a bit akward as they promote with float64
         # or default ints, but not float16 or uint8/int8 (which looks
-        # inconsistent here)
-        with pytest.raises(TypeError,
-                match=r".* do not have a common DType"):
-            np.result_type(other, rational)
+        # inconsistent here).  The new promotion fixes this (partially?)
+        if not weak_promotion and type(other) == float:
+            # The float version, checks float16 in the legacy path, which fails
+            # the integer version seems to check int8 (also), so it can
+            # pass.
+            with pytest.raises(TypeError,
+                    match=r".* do not have a common DType"):
+                np.result_type(other, rational)
+        else:
+            assert np.result_type(other, rational) == expected
 
         assert np.result_type(other, rational(1, 2)) == expected
 
diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index 0ef1b714b..ea96f0fef 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -239,6 +239,7 @@ class TestEinsum:
             assert_(b.base is a)
             assert_equal(b, a.swapaxes(0, 1))
 
+    @np._no_nep50_warning()
     def check_einsum_sums(self, dtype, do_opt=False):
         # Check various sums.  Does many sizes to exercise unrolled loops.
 
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index 6743dfb51..4e9a3c9d6 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -3,7 +3,7 @@ import pytest
 
 import numpy as np
 from numpy import uint16, float16, float32, float64
-from numpy.testing import assert_, assert_equal
+from numpy.testing import assert_, assert_equal, _OLD_PROMOTION
 
 
 def assert_raises_fpe(strmatch, callable, *args, **kwargs):
@@ -85,6 +85,7 @@ class TestHalf:
     @pytest.mark.parametrize("offset", [None, "up", "down"])
     @pytest.mark.parametrize("shift", [None, "up", "down"])
     @pytest.mark.parametrize("float_t", [np.float32, np.float64])
+    @np._no_nep50_warning()
     def test_half_conversion_rounding(self, float_t, shift, offset):
         # Assumes that round to even is used during casting.
         max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16)
@@ -450,31 +451,35 @@ class TestHalf:
         assert_equal(np.frexp(b), ([-0.5, 0.625, 0.5, 0.5, 0.75], [2, 3, 1, 3, 2]))
         assert_equal(np.ldexp(b, [0, 1, 2, 4, 2]), [-2, 10, 4, 64, 12])
 
-    def test_half_coercion(self):
+    @np._no_nep50_warning()
+    def test_half_coercion(self, weak_promotion):
         """Test that half gets coerced properly with the other types"""
         a16 = np.array((1,), dtype=float16)
         a32 = np.array((1,), dtype=float32)
         b16 = float16(1)
         b32 = float32(1)
 
-        assert_equal(np.power(a16, 2).dtype, float16)
-        assert_equal(np.power(a16, 2.0).dtype, float16)
-        assert_equal(np.power(a16, b16).dtype, float16)
-        assert_equal(np.power(a16, b32).dtype, float16)
-        assert_equal(np.power(a16, a16).dtype, float16)
-        assert_equal(np.power(a16, a32).dtype, float32)
-
-        assert_equal(np.power(b16, 2).dtype, float64)
-        assert_equal(np.power(b16, 2.0).dtype, float64)
-        assert_equal(np.power(b16, b16).dtype, float16)
-        assert_equal(np.power(b16, b32).dtype, float32)
-        assert_equal(np.power(b16, a16).dtype, float16)
-        assert_equal(np.power(b16, a32).dtype, float32)
-
-        assert_equal(np.power(a32, a16).dtype, float32)
-        assert_equal(np.power(a32, b16).dtype, float32)
-        assert_equal(np.power(b32, a16).dtype, float16)
-        assert_equal(np.power(b32, b16).dtype, float32)
+        assert np.power(a16, 2).dtype == float16
+        assert np.power(a16, 2.0).dtype == float16
+        assert np.power(a16, b16).dtype == float16
+        expected_dt = float32 if weak_promotion else float16
+        assert np.power(a16, b32).dtype == expected_dt
+        assert np.power(a16, a16).dtype == float16
+        assert np.power(a16, a32).dtype == float32
+
+        expected_dt = float16 if weak_promotion else float64
+        assert np.power(b16, 2).dtype == expected_dt
+        assert np.power(b16, 2.0).dtype == expected_dt
+        assert np.power(b16, b16).dtype, float16
+        assert np.power(b16, b32).dtype, float32
+        assert np.power(b16, a16).dtype, float16
+        assert np.power(b16, a32).dtype, float32
+
+        assert np.power(a32, a16).dtype == float32
+        assert np.power(a32, b16).dtype == float32
+        expected_dt = float32 if weak_promotion else float16
+        assert np.power(b32, a16).dtype == expected_dt
+        assert np.power(b32, b16).dtype == float32
 
     @pytest.mark.skipif(platform.machine() == "armv5tel",
                         reason="See gh-413.")
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 84fdf545f..9f6023c07 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -1172,6 +1172,21 @@ class TestCreation:
         a = np.array([1, Decimal(1)])
         a = np.array([[1], [Decimal(1)]])
 
+    @pytest.mark.parametrize("dtype", [object, "O,O", "O,(3)O", "(2,3)O"])
+    @pytest.mark.parametrize("function", [
+            np.ndarray, np.empty,
+            lambda shape, dtype: np.empty_like(np.empty(shape, dtype=dtype))])
+    def test_object_initialized_to_None(self, function, dtype):
+        # NumPy has support for object fields to be NULL (meaning None)
+        # but generally, we should always fill with the proper None, and
+        # downstream may rely on that.  (For fully initialized arrays!)
+        arr = function(3, dtype=dtype)
+        # We expect a fill value of None, which is not NULL:
+        expected = np.array(None).tobytes()
+        expected = expected * (arr.nbytes // len(expected))
+        assert arr.tobytes() == expected
+
+
 class TestStructured:
     def test_subarray_field_access(self):
         a = np.zeros((3, 5), dtype=[('a', ('i4', (2, 2)))])
@@ -2400,23 +2415,32 @@ class TestMethods:
         assert_raises(ValueError, d.sort, kind=k)
         assert_raises(ValueError, d.argsort, kind=k)
 
-    def test_searchsorted(self):
-        # test for floats and complex containing nans. The logic is the
-        # same for all float types so only test double types for now.
-        # The search sorted routines use the compare functions for the
-        # array type, so this checks if that is consistent with the sort
-        # order.
-
-        # check double
-        a = np.array([0, 1, np.nan])
-        msg = "Test real searchsorted with nans, side='l'"
+    @pytest.mark.parametrize('a', [
+        np.array([0, 1, np.nan], dtype=np.float16),
+        np.array([0, 1, np.nan], dtype=np.float32),
+        np.array([0, 1, np.nan]),
+    ])
+    def test_searchsorted_floats(self, a):
+        # test for floats arrays containing nans. Explicitly test 
+        # half, single, and double precision floats to verify that
+        # the NaN-handling is correct.
+        msg = "Test real (%s) searchsorted with nans, side='l'" % a.dtype
         b = a.searchsorted(a, side='left')
         assert_equal(b, np.arange(3), msg)
-        msg = "Test real searchsorted with nans, side='r'"
+        msg = "Test real (%s) searchsorted with nans, side='r'" % a.dtype
         b = a.searchsorted(a, side='right')
         assert_equal(b, np.arange(1, 4), msg)
         # check keyword arguments
         a.searchsorted(v=1)
+        x = np.array([0, 1, np.nan], dtype='float32')
+        y = np.searchsorted(x, x[-1])
+        assert_equal(y, 2)
+
+    def test_searchsorted_complex(self):
+        # test for complex arrays containing nans. 
+        # The search sorted routines use the compare functions for the
+        # array type, so this checks if that is consistent with the sort
+        # order.
         # check double complex
         a = np.zeros(9, dtype=np.complex128)
         a.real += [0, 0, 1, 1, 0, 1, np.nan, np.nan, np.nan]
@@ -2435,7 +2459,8 @@ class TestMethods:
         a = np.array([0, 128], dtype='>i4')
         b = a.searchsorted(np.array(128, dtype='>i4'))
         assert_equal(b, 1, msg)
-
+        
+    def test_searchsorted_n_elements(self):
         # Check 0 elements
         a = np.ones(0)
         b = a.searchsorted([0, 1, 2], 'left')
@@ -2455,6 +2480,7 @@ class TestMethods:
         b = a.searchsorted([0, 1, 2], 'right')
         assert_equal(b, [0, 2, 2])
 
+    def test_searchsorted_unaligned_array(self):
         # Test searching unaligned array
         a = np.arange(10)
         aligned = np.empty(a.itemsize * a.size + 1, 'uint8')
@@ -2471,6 +2497,7 @@ class TestMethods:
         b = a.searchsorted(unaligned, 'right')
         assert_equal(b, a + 1)
 
+    def test_searchsorted_resetting(self):
         # Test smart resetting of binsearch indices
         a = np.arange(5)
         b = a.searchsorted([6, 5, 4], 'left')
@@ -2478,6 +2505,7 @@ class TestMethods:
         b = a.searchsorted([6, 5, 4], 'right')
         assert_equal(b, [5, 5, 5])
 
+    def test_searchsorted_type_specific(self):
         # Test all type specific binary search functions
         types = ''.join((np.typecodes['AllInteger'], np.typecodes['AllFloat'],
                          np.typecodes['Datetime'], '?O'))
@@ -5469,10 +5497,12 @@ class TestIO:
 
     @pytest.mark.slow  # takes > 1 minute on mechanical hard drive
     def test_big_binary(self):
-        """Test workarounds for 32-bit limited fwrite, fseek, and ftell
-        calls in windows. These normally would hang doing something like this.
-        See http://projects.scipy.org/numpy/ticket/1660"""
-        if sys.platform != 'win32':
+        """Test workarounds for 32-bit limit for MSVC fwrite, fseek, and ftell
+
+        These normally would hang doing something like this.
+        See : https://github.com/numpy/numpy/issues/2256
+        """
+        if sys.platform != 'win32' or '[GCC ' in sys.version:
             return
         try:
             # before workarounds, only up to 2**32-1 worked
diff --git a/numpy/core/tests/test_nep50_promotions.py b/numpy/core/tests/test_nep50_promotions.py
new file mode 100644
index 000000000..5c59a16ea
--- /dev/null
+++ b/numpy/core/tests/test_nep50_promotions.py
@@ -0,0 +1,72 @@
+"""
+This file adds basic tests to test the NEP 50 style promotion compatibility
+mode.  Most of these test are likely to be simply deleted again once NEP 50
+is adopted in the main test suite.  A few may be moved elsewhere.
+"""
+
+import numpy as np
+import pytest
+
+
+@pytest.fixture(scope="module", autouse=True)
+def _weak_promotion_enabled():
+    state = np._get_promotion_state()
+    np._set_promotion_state("weak_and_warn")
+    yield
+    np._set_promotion_state(state)
+
+
+def test_nep50_examples():
+    with pytest.warns(UserWarning, match="result dtype changed"):
+        res = np.uint8(1) + 2
+    assert res.dtype == np.uint8
+
+    with pytest.warns(UserWarning, match="result dtype changed"):
+        res = np.array([1], np.uint8) + np.int64(1)
+    assert res.dtype == np.int64
+
+    with pytest.warns(UserWarning, match="result dtype changed"):
+        res = np.array([1], np.uint8) + np.array(1, dtype=np.int64)
+    assert res.dtype == np.int64
+
+    with pytest.warns(UserWarning, match="result dtype changed"):
+        # Note: Should warn (error with the errstate), but does not:
+        with np.errstate(over="raise"):
+            res = np.uint8(100) + 200
+    assert res.dtype == np.uint8
+
+    with pytest.warns(Warning) as recwarn:
+        res = np.float32(1) + 3e100
+
+    # Check that both warnings were given in the one call:
+    warning = str(recwarn.pop(UserWarning).message)
+    assert warning.startswith("result dtype changed")
+    warning = str(recwarn.pop(RuntimeWarning).message)
+    assert warning.startswith("overflow")
+    assert len(recwarn) == 0  # no further warnings
+    assert np.isinf(res)
+    assert res.dtype == np.float32
+
+    # Changes, but we don't warn for it (too noisy)
+    res = np.array([0.1], np.float32) == np.float64(0.1)
+    assert res[0] == False
+
+    # Additional test, since the above silences the warning:
+    with pytest.warns(UserWarning, match="result dtype changed"):
+        res = np.array([0.1], np.float32) + np.float64(0.1)
+    assert res.dtype == np.float64
+
+    with pytest.warns(UserWarning, match="result dtype changed"):
+        res = np.array([1.], np.float32) + np.int64(3)
+    assert res.dtype == np.float64
+
+
+@pytest.mark.xfail
+def test_nep50_integer_conversion_errors():
+    # Implementation for error paths is mostly missing (as of writing)
+    with pytest.raises(ValueError):  # (or TypeError?)
+        np.array([1], np.uint8) + 300
+
+    with pytest.raises(ValueError):  # (or TypeError?)
+        np.uint8(1) + 300
+
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 5b15e29b4..21bf91a35 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -3383,6 +3383,14 @@ class TestCross:
         for axisc in range(-2, 2):
             assert_equal(np.cross(u, u, axisc=axisc).shape, (3, 4))
 
+    def test_uint8_int32_mixed_dtypes(self):
+        # regression test for gh-19138
+        u = np.array([[195, 8, 9]], np.uint8)
+        v = np.array([250, 166, 68], np.int32)
+        z = np.array([[950, 11010, -30370]], dtype=np.int32)
+        assert_equal(np.cross(v, u), z)
+        assert_equal(np.cross(u, v), -z)
+
 
 def test_outer_out_param():
     arr1 = np.ones((5,))
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index 8b14284ff..7f00f6bf5 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -1003,6 +1003,7 @@ def test_longdouble_complex():
 
 @pytest.mark.parametrize(["__op__", "__rop__", "op", "cmp"], ops_with_names)
 @pytest.mark.parametrize("subtype", [float, int, complex, np.float16])
+@np._no_nep50_warning()
 def test_pyscalar_subclasses(subtype, __op__, __rop__, op, cmp):
     def op_func(self, other):
         return __op__
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 3466178a3..e4b1ceee3 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -2402,6 +2402,7 @@ def test_ufunc_types(ufunc):
 
 @pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np)
                                 if isinstance(getattr(np, x), np.ufunc)])
+@np._no_nep50_warning()
 def test_ufunc_noncontiguous(ufunc):
     '''
     Check that contiguous and non-contiguous calls to ufuncs
diff --git a/numpy/distutils/msvccompiler.py b/numpy/distutils/msvccompiler.py
index 681a254b8..2b93221ba 100644
--- a/numpy/distutils/msvccompiler.py
+++ b/numpy/distutils/msvccompiler.py
@@ -56,3 +56,21 @@ class MSVCCompiler(_MSVCCompiler):
         if platform_bits == 32:
             self.compile_options += ['/arch:SSE2']
             self.compile_options_debug += ['/arch:SSE2']
+
+
+def lib_opts_if_msvc(build_cmd):
+    """ Add flags if we are using MSVC compiler
+
+    We can't see `build_cmd` in our scope, because we have not initialized
+    the distutils build command, so use this deferred calculation to run
+    when we are building the library.
+    """
+    if build_cmd.compiler.compiler_type != 'msvc':
+        return []
+    # Explicitly disable whole-program optimization.
+    flags = ['/GL-']
+    # Disable voltbl section for vc142 to allow link using mingw-w64; see:
+    # https://github.com/matthew-brett/dll_investigation/issues/1#issuecomment-1100468171
+    if build_cmd.compiler_opt.cc_test_flags(['-d2VolatileMetadata-']):
+        flags.append('-d2VolatileMetadata-')
+    return flags
diff --git a/numpy/f2py/auxfuncs.py b/numpy/f2py/auxfuncs.py
index 98c9eff78..3f9b0ceaf 100644
--- a/numpy/f2py/auxfuncs.py
+++ b/numpy/f2py/auxfuncs.py
@@ -47,7 +47,7 @@ __all__ = [
     'isunsigned_chararray', 'isunsigned_long_long',
     'isunsigned_long_longarray', 'isunsigned_short',
     'isunsigned_shortarray', 'l_and', 'l_not', 'l_or', 'outmess',
-    'replace', 'show', 'stripcomma', 'throw_error',
+    'replace', 'show', 'stripcomma', 'throw_error', 'isattr_value'
 ]
 
 
@@ -298,6 +298,9 @@ def issubroutine_wrap(rout):
         return 0
     return issubroutine(rout) and hasassumedshape(rout)
 
+def isattr_value(var):
+    return 'value' in var.get('attrspec', [])
+
 
 def hasassumedshape(rout):
     if rout.get('hasassumedshape'):
@@ -692,7 +695,8 @@ def getcallprotoargument(rout, cb_map={}):
             elif isstring(var):
                 pass
             else:
-                ctype = ctype + '*'
+                if not isattr_value(var):
+                    ctype = ctype + '*'
             if ((isstring(var)
                  or isarrayofstrings(var)  # obsolete?
                  or isstringarray(var))):
diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py
index 63c48a878..1bac87102 100755
--- a/numpy/f2py/rules.py
+++ b/numpy/f2py/rules.py
@@ -71,7 +71,7 @@ from .auxfuncs import (
     islong_double, islong_doublefunction, islong_long,
     islong_longfunction, ismoduleroutine, isoptional, isrequired,
     isscalar, issigned_long_longarray, isstring, isstringarray,
-    isstringfunction, issubroutine,
+    isstringfunction, issubroutine, isattr_value,
     issubroutine_wrap, isthreadsafe, isunsigned, isunsigned_char,
     isunsigned_chararray, isunsigned_long_long,
     isunsigned_long_longarray, isunsigned_short, isunsigned_shortarray,
@@ -874,7 +874,7 @@ if (#varname#_cb.capi==Py_None) {
     {  # Common
         'decl': '    #ctype# #varname# = 0;',
         'pyobjfrom': {debugcapi: '    fprintf(stderr,"#vardebugshowvalue#\\n",#varname#);'},
-        'callfortran': {isintent_c: '#varname#,', l_not(isintent_c): '&#varname#,'},
+        'callfortran': {l_or(isintent_c, isattr_value): '#varname#,', l_not(l_or(isintent_c, isattr_value)): '&#varname#,'},
         'return': {isintent_out: ',#varname#'},
         '_check': l_and(isscalar, l_not(iscomplex))
     }, {
diff --git a/numpy/f2py/tests/src/value_attrspec/gh21665.f90 b/numpy/f2py/tests/src/value_attrspec/gh21665.f90
new file mode 100644
index 000000000..7d9dc0fd4
--- /dev/null
+++ b/numpy/f2py/tests/src/value_attrspec/gh21665.f90
@@ -0,0 +1,9 @@
+module fortfuncs
+  implicit none
+contains
+  subroutine square(x,y)
+    integer, intent(in), value :: x
+    integer, intent(out) :: y
+    y = x*x
+  end subroutine square
+end module fortfuncs
diff --git a/numpy/f2py/tests/test_f2py2e.py b/numpy/f2py/tests/test_f2py2e.py
index 9de043d73..2c10f046f 100644
--- a/numpy/f2py/tests/test_f2py2e.py
+++ b/numpy/f2py/tests/test_f2py2e.py
@@ -79,6 +79,17 @@ def retreal_f77(tmpdir_factory):
     fn.write_text(fdat, encoding="ascii")
     return fn
 
+@pytest.fixture(scope="session")
+def f2cmap_f90(tmpdir_factory):
+    """Generates a single f90 file for testing"""
+    fdat = util.getpath("tests", "src", "f2cmap", "isoFortranEnvMap.f90").read_text()
+    f2cmap = util.getpath("tests", "src", "f2cmap", ".f2py_f2cmap").read_text()
+    fn = tmpdir_factory.getbasetemp() / "f2cmap.f90"
+    fmap = tmpdir_factory.getbasetemp() / "mapfile"
+    fn.write_text(fdat, encoding="ascii")
+    fmap.write_text(f2cmap, encoding="ascii")
+    return fn
+
 
 def test_gen_pyf(capfd, hello_world_f90, monkeypatch):
     """Ensures that a signature file is generated via the CLI
@@ -105,6 +116,7 @@ def test_gen_pyf_stdout(capfd, hello_world_f90, monkeypatch):
         f2pycli()
         out, _ = capfd.readouterr()
         assert "Saving signatures to file" in out
+        assert "function hi() ! in " in out
 
 
 def test_gen_pyf_no_overwrite(capfd, hello_world_f90, monkeypatch):
@@ -533,13 +545,22 @@ def test_hlink():
     pass
 
 
-def test_f2cmap():
+def test_f2cmap(capfd, f2cmap_f90, monkeypatch):
     """Check that Fortran-to-Python KIND specs can be passed
 
     CLI :: --f2cmap
     """
-    # TODO: populate
-    pass
+    ipath = Path(f2cmap_f90)
+    monkeypatch.setattr(sys, "argv", f'f2py -m blah {ipath} --f2cmap mapfile'.split())
+
+    with util.switchdir(ipath.parent):
+        f2pycli()
+        out, _ = capfd.readouterr()
+        assert "Reading f2cmap from 'mapfile' ..." in out
+        assert "Mapping \"real(kind=real32)\" to \"float\"" in out
+        assert "Mapping \"real(kind=real64)\" to \"double\"" in out
+        assert "Mapping \"integer(kind=int64)\" to \"long_long\"" in out
+        assert "Successfully applied user defined f2cmap changes" in out
 
 
 def test_quiet(capfd, hello_world_f90, monkeypatch):
diff --git a/numpy/f2py/tests/test_value_attrspec.py b/numpy/f2py/tests/test_value_attrspec.py
new file mode 100644
index 000000000..83aaf6c91
--- /dev/null
+++ b/numpy/f2py/tests/test_value_attrspec.py
@@ -0,0 +1,14 @@
+import os
+import pytest
+
+from . import util
+
+class TestValueAttr(util.F2PyTest):
+    sources = [util.getpath("tests", "src", "value_attrspec", "gh21665.f90")]
+
+    # gh-21665
+    def test_long_long_map(self):
+        inp = 2
+        out = self.module.fortfuncs.square(inp)
+        exp_out = 4
+        assert out == exp_out
diff --git a/numpy/lib/arraysetops.py b/numpy/lib/arraysetops.py
index d42ab2675..cf5f47a82 100644
--- a/numpy/lib/arraysetops.py
+++ b/numpy/lib/arraysetops.py
@@ -516,12 +516,13 @@ def setxor1d(ar1, ar2, assume_unique=False):
     return aux[flag[1:] & flag[:-1]]
 
 
-def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None):
+def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,
+                     kind=None):
     return (ar1, ar2)
 
 
 @array_function_dispatch(_in1d_dispatcher)
-def in1d(ar1, ar2, assume_unique=False, invert=False):
+def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
     """
     Test whether each element of a 1-D array is also present in a second array.
 
@@ -544,6 +545,26 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
         False where an element of `ar1` is in `ar2` and True otherwise).
         Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
         to (but is faster than) ``np.invert(in1d(a, b))``.
+    kind : {None, 'sort', 'table'}, optional
+        The algorithm to use. This will not affect the final result,
+        but will affect the speed and memory use. The default, None,
+        will select automatically based on memory considerations.
+
+        * If 'sort', will use a mergesort-based approach. This will have
+          a memory usage of roughly 6 times the sum of the sizes of
+          `ar1` and `ar2`, not accounting for size of dtypes.
+        * If 'table', will use a lookup table approach similar
+          to a counting sort. This is only available for boolean and
+          integer arrays. This will have a memory usage of the
+          size of `ar1` plus the max-min value of `ar2`. `assume_unique`
+          has no effect when the 'table' option is used.
+        * If None, will automatically choose 'table' if
+          the required memory allocation is less than or equal to
+          6 times the sum of the sizes of `ar1` and `ar2`,
+          otherwise will use 'sort'. This is done to not use
+          a large amount of memory by default, even though
+          'table' may be faster in most cases. If 'table' is chosen,
+          `assume_unique` will have no effect.
 
         .. versionadded:: 1.8.0
 
@@ -569,6 +590,13 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     ``asarray(ar2)`` is an object array rather than the expected array of
     contained values.
 
+    Using ``kind='table'`` tends to be faster than `kind='sort'` if the
+    following relationship is true:
+    ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
+    but may use greater memory. The default value for `kind` will
+    be automatically selected based only on memory usage, so one may
+    manually set ``kind='table'`` if memory constraints can be relaxed.
+
     .. versionadded:: 1.4.0
 
     Examples
@@ -594,6 +622,83 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
     if ar2.dtype == object:
         ar2 = ar2.reshape(-1, 1)
 
+    if kind not in {None, 'sort', 'table'}:
+        raise ValueError(
+            f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")
+
+    # Can use the table method if all arrays are integers or boolean:
+    is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))
+    use_table_method = is_int_arrays and kind in {None, 'table'}
+
+    if use_table_method:
+        if ar2.size == 0:
+            if invert:
+                return np.ones_like(ar1, dtype=bool)
+            else:
+                return np.zeros_like(ar1, dtype=bool)
+
+        # Convert booleans to uint8 so we can use the fast integer algorithm
+        if ar1.dtype == bool:
+            ar1 = ar1.astype(np.uint8)
+        if ar2.dtype == bool:
+            ar2 = ar2.astype(np.uint8)
+
+        ar2_min = np.min(ar2)
+        ar2_max = np.max(ar2)
+
+        ar2_range = int(ar2_max) - int(ar2_min)
+
+        # Constraints on whether we can actually use the table method:
+        range_safe_from_overflow = ar2_range < np.iinfo(ar2.dtype).max
+        below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
+
+        # Optimal performance is for approximately
+        # log10(size) > (log10(range) - 2.27) / 0.927.
+        # However, here we set the requirement that by default
+        # the intermediate array can only be 6x
+        # the combined memory allocation of the original
+        # arrays. See discussion on 
+        # https://github.com/numpy/numpy/pull/12065.
+
+        if (
+            range_safe_from_overflow and 
+            (below_memory_constraint or kind == 'table')
+        ):
+
+            if invert:
+                outgoing_array = np.ones_like(ar1, dtype=bool)
+            else:
+                outgoing_array = np.zeros_like(ar1, dtype=bool)
+
+            # Make elements 1 where the integer exists in ar2
+            if invert:
+                isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)
+                isin_helper_ar[ar2 - ar2_min] = 0
+            else:
+                isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)
+                isin_helper_ar[ar2 - ar2_min] = 1
+
+            # Mask out elements we know won't work
+            basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)
+            outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -
+                                                        ar2_min]
+
+            return outgoing_array
+        elif kind == 'table':  # not range_safe_from_overflow
+            raise RuntimeError(
+                "You have specified kind='table', "
+                "but the range of values in `ar2` exceeds the "
+                "maximum integer of the datatype. "
+                "Please set `kind` to None or 'sort'."
+            )
+    elif kind == 'table':
+        raise ValueError(
+            "The 'table' method is only "
+            "supported for boolean or integer arrays. "
+            "Please select 'sort' or None for kind."
+        )
+
+
     # Check if one of the arrays may contain arbitrary objects
     contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
 
@@ -637,12 +742,14 @@ def in1d(ar1, ar2, assume_unique=False, invert=False):
         return ret[rev_idx]
 
 
-def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None):
+def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
+                     *, kind=None):
     return (element, test_elements)
 
 
 @array_function_dispatch(_isin_dispatcher)
-def isin(element, test_elements, assume_unique=False, invert=False):
+def isin(element, test_elements, assume_unique=False, invert=False, *,
+         kind=None):
     """
     Calculates ``element in test_elements``, broadcasting over `element` only.
     Returns a boolean array of the same shape as `element` that is True
@@ -664,6 +771,27 @@ def isin(element, test_elements, assume_unique=False, invert=False):
         calculating `element not in test_elements`. Default is False.
         ``np.isin(a, b, invert=True)`` is equivalent to (but faster
         than) ``np.invert(np.isin(a, b))``.
+    kind : {None, 'sort', 'table'}, optional
+        The algorithm to use. This will not affect the final result,
+        but will affect the speed and memory use. The default, None,
+        will select automatically based on memory considerations.
+
+        * If 'sort', will use a mergesort-based approach. This will have
+          a memory usage of roughly 6 times the sum of the sizes of
+          `ar1` and `ar2`, not accounting for size of dtypes.
+        * If 'table', will use a lookup table approach similar
+          to a counting sort. This is only available for boolean and
+          integer arrays. This will have a memory usage of the
+          size of `ar1` plus the max-min value of `ar2`. `assume_unique`
+          has no effect when the 'table' option is used.
+        * If None, will automatically choose 'table' if
+          the required memory allocation is less than or equal to
+          6 times the sum of the sizes of `ar1` and `ar2`,
+          otherwise will use 'sort'. This is done to not use
+          a large amount of memory by default, even though
+          'table' may be faster in most cases. If 'table' is chosen,
+          `assume_unique` will have no effect.
+
 
     Returns
     -------
@@ -691,6 +819,13 @@ def isin(element, test_elements, assume_unique=False, invert=False):
     of the `array` constructor's way of handling non-sequence collections.
     Converting the set to a list usually gives the desired behavior.
 
+    Using ``kind='table'`` tends to be faster than `kind='sort'` if the
+    following relationship is true:
+    ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
+    but may use greater memory. The default value for `kind` will
+    be automatically selected based only on memory usage, so one may
+    manually set ``kind='table'`` if memory constraints can be relaxed.
+
     .. versionadded:: 1.13.0
 
     Examples
@@ -737,7 +872,7 @@ def isin(element, test_elements, assume_unique=False, invert=False):
     """
     element = np.asarray(element)
     return in1d(element, test_elements, assume_unique=assume_unique,
-                invert=invert).reshape(element.shape)
+                invert=invert, kind=kind).reshape(element.shape)
 
 
 def _union1d_dispatcher(ar1, ar2):
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 843e1b85a..fb5dd6fdd 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -542,7 +542,7 @@ def average(a, axis=None, weights=None, returned=False, *,
             wgt = np.broadcast_to(wgt, (a.ndim-1)*(1,) + wgt.shape)
             wgt = wgt.swapaxes(-1, axis)
 
-        scl = wgt.sum(axis=axis, dtype=result_dtype)
+        scl = wgt.sum(axis=axis, dtype=result_dtype, **keepdims_kw)
         if np.any(scl == 0.0):
             raise ZeroDivisionError(
                 "Weights sum to zero, can't be normalized")
@@ -4000,7 +4000,7 @@ def percentile(a,
     With 'i' being the floor and 'g' the fractional part of the result.
 
     .. math::
-        i + g = (q - alpha) / ( n - alpha - beta + 1 )
+        i + g = q * ( n - alpha - beta + 1 ) + alpha
 
     The different methods then work as follows
 
@@ -4279,7 +4279,7 @@ def quantile(a,
     and alpha and beta are correction constants modifying i and j:
 
     .. math::
-        i + g = (q - alpha) / ( n - alpha - beta + 1 )
+        i + g = q * ( n - alpha - beta + 1 ) + alpha
 
     The different methods then work as follows
 
@@ -5140,10 +5140,14 @@ def delete(arr, obj, axis=None):
         single_value = False
         _obj = obj
         obj = np.asarray(obj)
+        # `size == 0` to allow empty lists similar to indexing, but (as there)
+        # is really too generic:
         if obj.size == 0 and not isinstance(_obj, np.ndarray):
             obj = obj.astype(intp)
-        elif obj.size == 1 and not isinstance(_obj, bool):
-            obj = obj.astype(intp).reshape(())
+        elif obj.size == 1 and obj.dtype.kind in "ui":
+            # For a size 1 integer array we can use the single-value path
+            # (most dtypes, except boolean, should just fail later).
+            obj = obj.item()
             single_value = True
 
     if single_value:
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 210c0ea94..471f85976 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1067,8 +1067,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     r"""
     Load data from a text file.
 
-    Each row in the text file must have the same number of values.
-
     Parameters
     ----------
     fname : file, str, pathlib.Path, list of str, generator
@@ -1129,10 +1127,17 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
         .. versionadded:: 1.14.0
     max_rows : int, optional
-        Read `max_rows` lines of content after `skiprows` lines. The default
-        is to read all the lines.
+        Read `max_rows` rows of content after `skiprows` lines. The default is
+        to read all the rows. Note that empty rows containing no data such as
+        empty lines and comment lines are not counted towards `max_rows`,
+        while such lines are counted in `skiprows`.
 
         .. versionadded:: 1.16.0
+        
+        .. versionchanged:: 1.23.0
+            Lines containing no data, including comment lines (e.g., lines 
+            starting with '#' or as specified via `comments`) are not counted 
+            towards `max_rows`.
     quotechar : unicode character or None, optional
         The character used to denote the start and end of a quoted item.
         Occurrences of the delimiter or comment characters are ignored within
@@ -1164,6 +1169,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     `genfromtxt` function provides more sophisticated handling of, e.g.,
     lines with missing values.
 
+    Each row in the input text file must have the same number of values to be
+    able to read all values. If all rows do not have same number of values, a
+    subset of up to n columns (where n is the least number of values present
+    in all rows) can be read by specifying the columns via `usecols`.
+
     .. versionadded:: 1.10.0
 
     The strings produced by the Python float.hex method can be used as
@@ -1272,6 +1282,15 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"')
     array('Hello, my name is "Monty"!', dtype='<U26')
 
+    Read subset of columns when all rows do not contain equal number of values:
+
+    >>> d = StringIO("1 2\n2 4\n3 9 12\n4 16 20")
+    >>> np.loadtxt(d, usecols=(0, 1))
+    array([[ 1.,  2.],
+           [ 2.,  4.],
+           [ 3.,  9.],
+           [ 4., 16.]])
+
     """
 
     if like is not None:
diff --git a/numpy/lib/tests/test_arraypad.py b/numpy/lib/tests/test_arraypad.py
index ca3c35335..64b6a2e18 100644
--- a/numpy/lib/tests/test_arraypad.py
+++ b/numpy/lib/tests/test_arraypad.py
@@ -474,8 +474,7 @@ class TestStatistic:
 
     @pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
     @pytest.mark.filterwarnings(
-        "ignore:invalid value encountered in (divide|double_scalars):"
-        "RuntimeWarning"
+        "ignore:invalid value encountered in( scalar)? divide:RuntimeWarning"
     )
     @pytest.mark.parametrize("mode", ["mean", "median"])
     def test_zero_stat_length_valid(self, mode):
diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py
index e64634b69..bb07e25a9 100644
--- a/numpy/lib/tests/test_arraysetops.py
+++ b/numpy/lib/tests/test_arraysetops.py
@@ -195,7 +195,8 @@ class TestSetOps:
         assert_equal(actual, expected)
         assert actual.dtype == expected.dtype
 
-    def test_isin(self):
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_isin(self, kind):
         # the tests for in1d cover most of isin's behavior
         # if in1d is removed, would need to change those tests to test
         # isin instead.
@@ -205,7 +206,7 @@ class TestSetOps:
         isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
 
         def assert_isin_equal(a, b):
-            x = isin(a, b)
+            x = isin(a, b, kind=kind)
             y = isin_slow(a, b)
             assert_array_equal(x, y)
 
@@ -231,12 +232,32 @@ class TestSetOps:
         assert_isin_equal(5, 6)
 
         # empty array-like:
-        x = []
-        assert_isin_equal(x, b)
-        assert_isin_equal(a, x)
-        assert_isin_equal(x, x)
-
-    def test_in1d(self):
+        if kind != "table":
+            # An empty list will become float64,
+            # which is invalid for kind="table"
+            x = []
+            assert_isin_equal(x, b)
+            assert_isin_equal(a, x)
+            assert_isin_equal(x, x)
+
+        # empty array with various types:
+        for dtype in [bool, np.int64, np.float64]:
+            if kind == "table" and dtype == np.float64:
+                continue
+
+            if dtype in {np.int64, np.float64}:
+                ar = np.array([10, 20, 30], dtype=dtype)
+            elif dtype in {bool}:
+                ar = np.array([True, False, False])
+
+            empty_array = np.array([], dtype=dtype)
+
+            assert_isin_equal(empty_array, ar)
+            assert_isin_equal(ar, empty_array)
+            assert_isin_equal(empty_array, empty_array)
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d(self, kind):
         # we use two different sizes for the b array here to test the
         # two different paths in in1d().
         for mult in (1, 10):
@@ -244,57 +265,58 @@ class TestSetOps:
             a = [5, 7, 1, 2]
             b = [2, 4, 3, 1, 5] * mult
             ec = np.array([True, False, True, True])
-            c = in1d(a, b, assume_unique=True)
+            c = in1d(a, b, assume_unique=True, kind=kind)
             assert_array_equal(c, ec)
 
             a[0] = 8
             ec = np.array([False, False, True, True])
-            c = in1d(a, b, assume_unique=True)
+            c = in1d(a, b, assume_unique=True, kind=kind)
             assert_array_equal(c, ec)
 
             a[0], a[3] = 4, 8
             ec = np.array([True, False, True, False])
-            c = in1d(a, b, assume_unique=True)
+            c = in1d(a, b, assume_unique=True, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
             b = [2, 3, 4] * mult
             ec = [False, True, False, True, True, True, True, True, True,
                   False, True, False, False, False]
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             b = b + [5, 5, 4] * mult
             ec = [True, True, True, True, True, True, True, True, True, True,
                   True, False, True, True]
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 7, 1, 2])
             b = np.array([2, 4, 3, 1, 5] * mult)
             ec = np.array([True, False, True, True])
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 7, 1, 1, 2])
             b = np.array([2, 4, 3, 3, 1, 5] * mult)
             ec = np.array([True, False, True, True, True])
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
             a = np.array([5, 5])
             b = np.array([2, 2] * mult)
             ec = np.array([False, False])
-            c = in1d(a, b)
+            c = in1d(a, b, kind=kind)
             assert_array_equal(c, ec)
 
         a = np.array([5])
         b = np.array([2])
         ec = np.array([False])
-        c = in1d(a, b)
+        c = in1d(a, b, kind=kind)
         assert_array_equal(c, ec)
 
-        assert_array_equal(in1d([], []), [])
+        if kind in {None, "sort"}:
+            assert_array_equal(in1d([], [], kind=kind), [])
 
     def test_in1d_char_array(self):
         a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
@@ -305,16 +327,29 @@ class TestSetOps:
 
         assert_array_equal(c, ec)
 
-    def test_in1d_invert(self):
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_invert(self, kind):
         "Test in1d's invert parameter"
         # We use two different sizes for the b array here to test the
         # two different paths in in1d().
         for mult in (1, 10):
             a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
             b = [2, 3, 4] * mult
-            assert_array_equal(np.invert(in1d(a, b)), in1d(a, b, invert=True))
-
-    def test_in1d_ravel(self):
+            assert_array_equal(np.invert(in1d(a, b, kind=kind)),
+                               in1d(a, b, invert=True, kind=kind))
+
+        # float:
+        if kind in {None, "sort"}:
+            for mult in (1, 10):
+                a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
+                            dtype=np.float32)
+                b = [2, 3, 4] * mult
+                b = np.array(b, dtype=np.float32)
+                assert_array_equal(np.invert(in1d(a, b, kind=kind)),
+                                   in1d(a, b, invert=True, kind=kind))
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_ravel(self, kind):
         # Test that in1d ravels its input arrays. This is not documented
         # behavior however. The test is to ensure consistentency.
         a = np.arange(6).reshape(2, 3)
@@ -322,10 +357,75 @@ class TestSetOps:
         long_b = np.arange(3, 63).reshape(30, 2)
         ec = np.array([False, False, False, True, True, True])
 
-        assert_array_equal(in1d(a, b, assume_unique=True), ec)
-        assert_array_equal(in1d(a, b, assume_unique=False), ec)
-        assert_array_equal(in1d(a, long_b, assume_unique=True), ec)
-        assert_array_equal(in1d(a, long_b, assume_unique=False), ec)
+        assert_array_equal(in1d(a, b, assume_unique=True, kind=kind),
+                           ec)
+        assert_array_equal(in1d(a, b, assume_unique=False,
+                                kind=kind),
+                           ec)
+        assert_array_equal(in1d(a, long_b, assume_unique=True,
+                                kind=kind),
+                           ec)
+        assert_array_equal(in1d(a, long_b, assume_unique=False,
+                                kind=kind),
+                           ec)
+
+    def test_in1d_hit_alternate_algorithm(self):
+        """Hit the standard isin code with integers"""
+        # Need extreme range to hit standard code
+        # This hits it without the use of kind='table'
+        a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
+        b = np.array([2, 3, 4, 1e9], dtype=np.int64)
+        expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
+        assert_array_equal(expected, in1d(a, b))
+        assert_array_equal(np.invert(expected), in1d(a, b, invert=True))
+
+        a = np.array([5, 7, 1, 2], dtype=np.int64)
+        b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
+        ec = np.array([True, False, True, True])
+        c = in1d(a, b, assume_unique=True)
+        assert_array_equal(c, ec)
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_boolean(self, kind):
+        """Test that in1d works for boolean input"""
+        a = np.array([True, False])
+        b = np.array([False, False, False])
+        expected = np.array([False, True])
+        assert_array_equal(expected,
+                           in1d(a, b, kind=kind))
+        assert_array_equal(np.invert(expected),
+                           in1d(a, b, invert=True, kind=kind))
+
+    @pytest.mark.parametrize("kind", [None, "sort"])
+    def test_in1d_timedelta(self, kind):
+        """Test that in1d works for timedelta input"""
+        rstate = np.random.RandomState(0)
+        a = rstate.randint(0, 100, size=10)
+        b = rstate.randint(0, 100, size=10)
+        truth = in1d(a, b)
+        a_timedelta = a.astype("timedelta64[s]")
+        b_timedelta = b.astype("timedelta64[s]")
+        assert_array_equal(truth, in1d(a_timedelta, b_timedelta, kind=kind))
+
+    def test_in1d_table_timedelta_fails(self):
+        a = np.array([0, 1, 2], dtype="timedelta64[s]")
+        b = a
+        # Make sure it raises a value error:
+        with pytest.raises(ValueError):
+            in1d(a, b, kind="table")
+
+    @pytest.mark.parametrize("kind", [None, "sort", "table"])
+    def test_in1d_mixed_boolean(self, kind):
+        """Test that in1d works as expected for bool/int input."""
+        for dtype in np.typecodes["AllInteger"]:
+            a = np.array([True, False, False], dtype=bool)
+            b = np.array([1, 1, 1, 1], dtype=dtype)
+            expected = np.array([True, False, False], dtype=bool)
+            assert_array_equal(in1d(a, b, kind=kind), expected)
+
+            a, b = b, a
+            expected = np.array([True, True, True, True], dtype=bool)
+            assert_array_equal(in1d(a, b, kind=kind), expected)
 
     def test_in1d_first_array_is_object(self):
         ar1 = [None]
@@ -391,6 +491,40 @@ class TestSetOps:
         result = np.in1d(ar1, ar2, invert=True)
         assert_array_equal(result, np.invert(expected))
 
+    def test_in1d_errors(self):
+        """Test that in1d raises expected errors."""
+
+        # Error 1: `kind` is not one of 'sort' 'table' or None.
+        ar1 = np.array([1, 2, 3, 4, 5])
+        ar2 = np.array([2, 4, 6, 8, 10])
+        assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort')
+
+        # Error 2: `kind="table"` does not work for non-integral arrays.
+        obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
+        obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
+        assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table')
+
+        for dtype in [np.int32, np.int64]:
+            ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
+            # The range of this array will overflow:
+            overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
+
+            # Error 3: `kind="table"` will trigger a runtime error
+            #  if there is an integer overflow expected when computing the
+            #  range of ar2
+            assert_raises(
+                RuntimeError,
+                in1d, ar1, overflow_ar2, kind='table'
+            )
+
+            # Non-error: `kind=None` will *not* trigger a runtime error
+            #  if there is an integer overflow, it will switch to
+            #  the `sort` algorithm.
+            result = np.in1d(ar1, overflow_ar2, kind=None)
+            assert_array_equal(result, [True] + [False] * 4)
+            result = np.in1d(ar1, overflow_ar2, kind='sort')
+            assert_array_equal(result, [True] + [False] * 4)
+
     def test_union1d(self):
         a = np.array([5, 4, 7, 1, 2])
         b = np.array([2, 4, 3, 3, 2, 1, 5])
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index bdcbef91d..8457551ca 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -360,6 +360,18 @@ class TestAverage:
 
         assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
 
+        # test weights with `keepdims=False` and `keepdims=True`
+        x = np.array([2, 3, 4]).reshape(3, 1)
+        w = np.array([4, 5, 6]).reshape(3, 1)
+
+        actual = np.average(x, weights=w, axis=1, keepdims=False)
+        desired = np.array([2., 3., 4.])
+        assert_array_equal(actual, desired)
+
+        actual = np.average(x, weights=w, axis=1, keepdims=True)
+        desired = np.array([[2.], [3.], [4.]])
+        assert_array_equal(actual, desired)
+
     def test_returned(self):
         y = np.array([[1, 2, 3], [4, 5, 6]])
 
@@ -913,18 +925,39 @@ class TestDelete:
         with pytest.raises(IndexError):
             np.delete([0, 1, 2], np.array([], dtype=float))
 
-    def test_single_item_array(self):
-        a_del = delete(self.a, 1)
-        a_del_arr = delete(self.a, np.array([1]))
-        a_del_lst = delete(self.a, [1])
-        a_del_obj = delete(self.a, np.array([1], dtype=object))
-        assert_equal(a_del, a_del_arr, a_del_lst, a_del_obj)
+    @pytest.mark.parametrize("indexer", [np.array([1]), [1]])
+    def test_single_item_array(self, indexer):
+        a_del_int = delete(self.a, 1)
+        a_del = delete(self.a, indexer)
+        assert_equal(a_del_int, a_del)
+
+        nd_a_del_int = delete(self.nd_a, 1, axis=1)
+        nd_a_del = delete(self.nd_a, np.array([1]), axis=1)
+        assert_equal(nd_a_del_int, nd_a_del)
+
+    def test_single_item_array_non_int(self):
+        # Special handling for integer arrays must not affect non-integer ones.
+        # If `False` was cast to `0` it would delete the element:
+        res = delete(np.ones(1), np.array([False]))
+        assert_array_equal(res, np.ones(1))
+
+        # Test the more complicated (with axis) case from gh-21840
+        x = np.ones((3, 1))
+        false_mask = np.array([False], dtype=bool)
+        true_mask = np.array([True], dtype=bool)
+
+        res = delete(x, false_mask, axis=-1)
+        assert_array_equal(res, x)
+        res = delete(x, true_mask, axis=-1)
+        assert_array_equal(res, x[:, :0])
+
+        # Object or e.g. timedeltas should *not* be allowed
+        with pytest.raises(IndexError):
+            delete(np.ones(2), np.array([0], dtype=object))
 
-        nd_a_del = delete(self.nd_a, 1, axis=1)
-        nd_a_del_arr = delete(self.nd_a, np.array([1]), axis=1)
-        nd_a_del_lst = delete(self.nd_a, [1], axis=1)
-        nd_a_del_obj = delete(self.nd_a, np.array([1], dtype=object), axis=1)
-        assert_equal(nd_a_del, nd_a_del_arr, nd_a_del_lst, nd_a_del_obj)
+        with pytest.raises(IndexError):
+            # timedeltas are sometimes "integral, but clearly not allowed:
+            delete(np.ones(2), np.array([0], dtype="m8[ns]"))
 
 
 class TestGradient:
@@ -2954,11 +2987,11 @@ class TestPercentile:
 
     H_F_TYPE_CODES = [(int_type, np.float64)
                       for int_type in np.typecodes["AllInteger"]
-                      ] + [(np.float16, np.float64),
-                           (np.float32, np.float64),
+                      ] + [(np.float16, np.float16),
+                           (np.float32, np.float32),
                            (np.float64, np.float64),
                            (np.longdouble, np.longdouble),
-                           (np.complex64, np.complex128),
+                           (np.complex64, np.complex64),
                            (np.complex128, np.complex128),
                            (np.clongdouble, np.clongdouble),
                            (np.dtype("O"), np.float64)]
@@ -2980,10 +3013,15 @@ class TestPercentile:
                                   expected,
                                   input_dtype,
                                   expected_dtype):
+        expected_dtype = np.dtype(expected_dtype)
+        if np._get_promotion_state() == "legacy":
+            expected_dtype = np.promote_types(expected_dtype, np.float64)
+
         arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=input_dtype)
         actual = np.percentile(arr, 40.0, method=method)
 
-        np.testing.assert_almost_equal(actual, expected, 14)
+        np.testing.assert_almost_equal(
+            actual, expected_dtype.type(expected), 14)
 
         if method in ["inverted_cdf", "closest_observation"]:
             if input_dtype == "O":
diff --git a/numpy/linalg/lapack_lite/f2c.c b/numpy/linalg/lapack_lite/f2c.c
index f1d3fdfbe..869fce5d3 100644
--- a/numpy/linalg/lapack_lite/f2c.c
+++ b/numpy/linalg/lapack_lite/f2c.c
@@ -377,10 +377,11 @@ p->i = p1.i;
 
 #endif /* NO_OVERWRITE */
 
- int
 #ifdef KR_headers
+int
 s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll;
 #else
+int
 s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
 #endif
 {
@@ -429,7 +430,8 @@ s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
 		free(lp1);
 		}
 #endif
-	}
+	return 0;
+}
 
 
 /* compare two strings */
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
index ebbd92539..f871a5f8e 100644
--- a/numpy/linalg/tests/test_linalg.py
+++ b/numpy/linalg/tests/test_linalg.py
@@ -1803,9 +1803,9 @@ class TestCholesky:
         c = np.linalg.cholesky(a)
 
         b = np.matmul(c, c.transpose(t).conj())
-        assert_allclose(b, a,
-                        err_msg=f'{shape} {dtype}\n{a}\n{c}',
-                        atol=500 * a.shape[0] * np.finfo(dtype).eps)
+        with np._no_nep50_warning():
+            atol = 500 * a.shape[0] * np.finfo(dtype).eps
+        assert_allclose(b, a, atol=atol, err_msg=f'{shape} {dtype}\n{a}\n{c}')
 
     def test_0_size(self):
         class ArraySubclass(np.ndarray):
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index d8fd4f389..93eb74be3 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -4102,6 +4102,10 @@ class MaskedArray(ndarray):
 
         odata = getdata(other)
         if mask.dtype.names is not None:
+            # only == and != are reasonably defined for structured dtypes,
+            # so give up early for all other comparisons:
+            if compare not in (operator.eq, operator.ne):
+                return NotImplemented
             # For possibly masked structured arrays we need to be careful,
             # since the standard structured array comparison will use all
             # fields, masked or not. To avoid masked fields influencing the
@@ -4124,10 +4128,11 @@ class MaskedArray(ndarray):
         if isinstance(check, (np.bool_, bool)):
             return masked if mask else check
 
-        if mask is not nomask:
+        if mask is not nomask and compare in (operator.eq, operator.ne):
             # Adjust elements that were masked, which should be treated
             # as equal if masked in both, unequal if masked in one.
             # Note that this works automatically for structured arrays too.
+            # Ignore this for operations other than `==` and `!=`
             check = np.where(mask, compare(smask, omask), check)
             if mask.shape != check.shape:
                 # Guarantee consistency of the shape, making a copy since the
@@ -4175,6 +4180,19 @@ class MaskedArray(ndarray):
         """
         return self._comparison(other, operator.ne)
 
+    # All other comparisons:
+    def __le__(self, other):
+        return self._comparison(other, operator.le)
+
+    def __lt__(self, other):
+        return self._comparison(other, operator.lt)
+
+    def __ge__(self, other):
+        return self._comparison(other, operator.ge)
+
+    def __gt__(self, other):
+        return self._comparison(other, operator.gt)
+
     def __add__(self, other):
         """
         Add self to other, and return a new masked array.
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index d90831b9b..d2986012b 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -645,7 +645,7 @@ def average(a, axis=None, weights=None, returned=False, *,
             wgt = wgt*(~a.mask)
             wgt.mask |= a.mask
 
-        scl = wgt.sum(axis=axis, dtype=result_dtype)
+        scl = wgt.sum(axis=axis, dtype=result_dtype, **keepdims_kw)
         avg = np.multiply(a, wgt,
                           dtype=result_dtype).sum(axis, **keepdims_kw) / scl
 
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 4fac897de..b056d5169 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -1756,6 +1756,52 @@ class TestMaskedArrayArithmetic:
         assert_equal(test.mask, [True, False])
         assert_(test.fill_value == True)
 
+    @pytest.mark.parametrize('dt1', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('dt2', num_dts, ids=num_ids)
+    @pytest.mark.parametrize('fill', [None, 1])
+    @pytest.mark.parametrize('op',
+            [operator.le, operator.lt, operator.ge, operator.gt])
+    def test_comparisons_for_numeric(self, op, dt1, dt2, fill):
+        # Test the equality of structured arrays
+        a = array([0, 1], dtype=dt1, mask=[0, 1], fill_value=fill)
+
+        test = op(a, a)
+        assert_equal(test.data, op(a._data, a._data))
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        test = op(a, a[0])
+        assert_equal(test.data, op(a._data, a._data[0]))
+        assert_equal(test.mask, [False, True])
+        assert_(test.fill_value == True)
+
+        b = array([0, 1], dtype=dt2, mask=[1, 0], fill_value=fill)
+        test = op(a, b)
+        assert_equal(test.data, op(a._data, b._data))
+        assert_equal(test.mask, [True, True])
+        assert_(test.fill_value == True)
+
+        test = op(a[0], b)
+        assert_equal(test.data, op(a._data[0], b._data))
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+        test = op(b, a[0])
+        assert_equal(test.data, op(b._data, a._data[0]))
+        assert_equal(test.mask, [True, False])
+        assert_(test.fill_value == True)
+
+    @pytest.mark.parametrize('op',
+            [operator.le, operator.lt, operator.ge, operator.gt])
+    @pytest.mark.parametrize('fill', [None, "N/A"])
+    def test_comparisons_strings(self, op, fill):
+        # See gh-21770, mask propagation is broken for strings (and some other
+        # cases) so we explicitly test strings here.
+        # In principle only == and != may need special handling...
+        ma1 = masked_array(["a", "b", "cde"], mask=[0, 1, 0], fill_value=fill)
+        ma2 = masked_array(["cde", "b", "a"], mask=[0, 1, 0], fill_value=fill)
+        assert_equal(op(ma1, ma2)._data, op(ma1._data, ma2._data))
+
     def test_eq_with_None(self):
         # Really, comparisons with None should not be done, but check them
         # anyway. Note that pep8 will flag these tests.
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index 1827edd1f..04bf8cfc2 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -241,6 +241,15 @@ class TestAverage:
         a2dma = average(a2dm, axis=1)
         assert_equal(a2dma, [1.5, 4.0])
 
+    def test_testAverage4(self):
+        # Test that `keepdims` works with average
+        x = np.array([2, 3, 4]).reshape(3, 1)
+        b = np.ma.array(x, mask=[[False], [False], [True]])
+        w = np.array([4, 5, 6]).reshape(3, 1)
+        actual = average(b, weights=w, axis=1, keepdims=True)
+        desired = masked_array([[2.], [3.], [4.]], [[False], [False], [True]])
+        assert_equal(actual, desired)
+
     def test_onintegers_with_mask(self):
         # Test average on integers with mask
         a = average(array([1, 2]))
diff --git a/numpy/matlib.py b/numpy/matlib.py
index bd6b63289..e929fd9b1 100644
--- a/numpy/matlib.py
+++ b/numpy/matlib.py
@@ -300,9 +300,10 @@ def randn(*args):
 
     Notes
     -----
-    For random samples from :math:`N(\\mu, \\sigma^2)`, use:
+    For random samples from the normal distribution with mean ``mu`` and
+    standard deviation ``sigma``, use::
 
-    ``sigma * np.matlib.randn(...) + mu``
+        sigma * np.matlib.randn(...) + mu
 
     Examples
     --------
@@ -314,7 +315,8 @@ def randn(*args):
     matrix([[ 0.99734545,  0.2829785 , -1.50629471],
             [-0.57860025,  1.65143654, -2.42667924]])
 
-    Two-by-four matrix of samples from :math:`N(3, 6.25)`:
+    Two-by-four matrix of samples from the normal distribution with
+    mean 3 and standard deviation 2.5:
 
     >>> 2.5 * np.matlib.randn((2, 4)) + 3
     matrix([[1.92771843, 6.16484065, 0.83314899, 1.30278462],
diff --git a/numpy/polynomial/polynomial.py b/numpy/polynomial/polynomial.py
index 8e2c6f002..d102f5a30 100644
--- a/numpy/polynomial/polynomial.py
+++ b/numpy/polynomial/polynomial.py
@@ -1339,7 +1339,7 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None):
     >>> np.random.seed(123)
     >>> from numpy.polynomial import polynomial as P
     >>> x = np.linspace(-1,1,51) # x "data": [-1, -0.96, ..., 0.96, 1]
-    >>> y = x**3 - x + np.random.randn(len(x)) # x^3 - x + N(0,1) "noise"
+    >>> y = x**3 - x + np.random.randn(len(x))  # x^3 - x + Gaussian noise
     >>> c, stats = P.polyfit(x,y,3,full=True)
     >>> np.random.seed(123)
     >>> c # c[0], c[2] should be approx. 0, c[1] approx. -1, c[3] approx. 1
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx
index 0019c4bcd..5153c3827 100644
--- a/numpy/random/_generator.pyx
+++ b/numpy/random/_generator.pyx
@@ -1001,7 +1001,8 @@ cdef class Generator:
 
         Notes
         -----
-        For random samples from :math:`N(\\mu, \\sigma^2)`, use one of::
+        For random samples from the normal distribution with mean ``mu`` and
+        standard deviation ``sigma``, use one of::
 
             mu + sigma * rng.standard_normal(size=...)
             rng.normal(mu, sigma, size=...)
@@ -1022,7 +1023,8 @@ cdef class Generator:
         >>> s.shape
         (3, 4, 2)
 
-        Two-by-four array of samples from :math:`N(3, 6.25)`:
+        Two-by-four array of samples from the normal distribution with
+        mean 3 and standard deviation 2.5:
 
         >>> 3 + 2.5 * rng.standard_normal(size=(2, 4))
         array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
@@ -1126,7 +1128,8 @@ cdef class Generator:
         ...          linewidth=2, color='r')
         >>> plt.show()
 
-        Two-by-four array of samples from N(3, 6.25):
+        Two-by-four array of samples from the normal distribution with
+        mean 3 and standard deviation 2.5:
 
         >>> np.random.default_rng().normal(3, 2.5, size=(2, 4))
         array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
@@ -4769,6 +4772,7 @@ cdef class Generator:
         return arr[tuple(slices)]
 
 
+@cython.embedsignature(True)
 def default_rng(seed=None):
     """Construct a new Generator with the default BitGenerator (PCG64).
 
diff --git a/numpy/random/bit_generator.pyx b/numpy/random/bit_generator.pyx
index 2c50dbf70..62eaab6c0 100644
--- a/numpy/random/bit_generator.pyx
+++ b/numpy/random/bit_generator.pyx
@@ -486,7 +486,7 @@ cdef class BitGenerator():
         A seed to initialize the `BitGenerator`. If None, then fresh,
         unpredictable entropy will be pulled from the OS. If an ``int`` or
         ``array_like[ints]`` is passed, then it will be passed to
-        ~`numpy.random.SeedSequence` to derive the initial `BitGenerator` state.
+        `~numpy.random.SeedSequence` to derive the initial `BitGenerator` state.
         One may also pass in a `SeedSequence` instance.
 
     Attributes
diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
index 408d5a332..19d23f6a8 100644
--- a/numpy/random/mtrand.pyx
+++ b/numpy/random/mtrand.pyx
@@ -1224,16 +1224,18 @@ cdef class RandomState:
 
         Notes
         -----
-        For random samples from :math:`N(\\mu, \\sigma^2)`, use:
+        For random samples from the normal distribution with mean ``mu`` and
+        standard deviation ``sigma``, use::
 
-        ``sigma * np.random.randn(...) + mu``
+            sigma * np.random.randn(...) + mu
 
         Examples
         --------
         >>> np.random.randn()
         2.1923875335537315  # random
 
-        Two-by-four array of samples from N(3, 6.25):
+        Two-by-four array of samples from the normal distribution with
+        mean 3 and standard deviation 2.5:
 
         >>> 3 + 2.5 * np.random.randn(2, 4)
         array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
@@ -1373,7 +1375,8 @@ cdef class RandomState:
 
         Notes
         -----
-        For random samples from :math:`N(\\mu, \\sigma^2)`, use one of::
+        For random samples from the normal distribution with mean ``mu`` and
+        standard deviation ``sigma``, use one of::
 
             mu + sigma * np.random.standard_normal(size=...)
             np.random.normal(mu, sigma, size=...)
@@ -1393,7 +1396,8 @@ cdef class RandomState:
         >>> s.shape
         (3, 4, 2)
 
-        Two-by-four array of samples from :math:`N(3, 6.25)`:
+        Two-by-four array of samples from the normal distribution with
+        mean 3 and standard deviation 2.5:
 
         >>> 3 + 2.5 * np.random.standard_normal(size=(2, 4))
         array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
@@ -1500,7 +1504,8 @@ cdef class RandomState:
         ...          linewidth=2, color='r')
         >>> plt.show()
 
-        Two-by-four array of samples from N(3, 6.25):
+        Two-by-four array of samples from the normal distribution with
+        mean 3 and standard deviation 2.5:
 
         >>> np.random.normal(3, 2.5, size=(2, 4))
         array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],   # random
diff --git a/numpy/random/setup.py b/numpy/random/setup.py
index 233344430..cd9ad976c 100644
--- a/numpy/random/setup.py
+++ b/numpy/random/setup.py
@@ -1,12 +1,9 @@
 import os
-import platform
 import sys
 from os.path import join
 
 from numpy.distutils.system_info import platform_bits
-
-is_msvc = (platform.platform().startswith('Windows') and
-           platform.python_compiler().startswith('MS'))
+from numpy.distutils.msvccompiler import lib_opts_if_msvc
 
 
 def configuration(parent_package='', top_path=None):
@@ -43,13 +40,6 @@ def configuration(parent_package='', top_path=None):
     # Some bit generators exclude GCC inlining
     EXTRA_COMPILE_ARGS = ['-U__GNUC_GNU_INLINE__']
 
-    if is_msvc and platform_bits == 32:
-        # 32-bit windows requires explicit sse2 option
-        EXTRA_COMPILE_ARGS += ['/arch:SSE2']
-    elif not is_msvc:
-        # Some bit generators require c99
-        EXTRA_COMPILE_ARGS += ['-std=c99']
-
     if sys.platform == 'cygwin':
         # Export symbols without __declspec(dllexport) for using by cython.
         # Using __declspec(dllexport) does not export other necessary symbols
@@ -73,25 +63,25 @@ def configuration(parent_package='', top_path=None):
         'src/distributions/random_hypergeometric.c',
     ]
 
-    def gl_if_msvc(build_cmd):
-        """ Add flag if we are using MSVC compiler
+    def lib_opts(build_cmd):
+        """ Add flags that depend on the compiler.
 
-        We can't see this in our scope, because we have not initialized the
-        distutils build command, so use this deferred calculation to run when
-        we are building the library.
+        We can't see which compiler we are using in our scope, because we have
+        not initialized the distutils build command, so use this deferred
+        calculation to run when we are building the library.
         """
-        # Keep in sync with numpy/core/setup.py
-        if build_cmd.compiler.compiler_type == 'msvc':
-            # explicitly disable whole-program optimization
-            return ['/GL-']
-        return []
+        opts = lib_opts_if_msvc(build_cmd)
+        if build_cmd.compiler.compiler_type != 'msvc':
+            # Some bit generators require c99
+            opts.append('-std=c99')
+        return opts
 
     config.add_installed_library('npyrandom',
         sources=npyrandom_sources,
         install_dir='lib',
         build_info={
             'include_dirs' : [],  # empty list required for creating npyrandom.h
-            'extra_compiler_args': [gl_if_msvc],
+            'extra_compiler_args': [lib_opts],
         })
 
     for gen in ['mt19937']:
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index e4f8b9892..c553658cb 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -36,6 +36,7 @@ __all__ = [
         'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY',
         'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare',
         'assert_no_gc_cycles', 'break_cycles', 'HAS_LAPACK64', 'IS_PYSTON',
+        '_OLD_PROMOTION'
         ]
 
 
@@ -52,6 +53,8 @@ IS_PYSTON = hasattr(sys, "pyston_version_info")
 HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON
 HAS_LAPACK64 = numpy.linalg.lapack_lite._ilp64
 
+_OLD_PROMOTION = lambda: np._get_promotion_state() == 'legacy'
+
 
 def import_nose():
     """ Import nose only when needed.
@@ -473,6 +476,7 @@ def print_assert_equal(test_string, actual, desired):
         raise AssertionError(msg.getvalue())
 
 
+@np._no_nep50_warning()
 def assert_almost_equal(actual,desired,decimal=7,err_msg='',verbose=True):
     """
     Raises an AssertionError if two items are not equal up to desired
@@ -485,7 +489,7 @@ def assert_almost_equal(actual,desired,decimal=7,err_msg='',verbose=True):
 
     The test verifies that the elements of `actual` and `desired` satisfy.
 
-        ``abs(desired-actual) < 1.5 * 10**(-decimal)``
+        ``abs(desired-actual) < float64(1.5 * 10**(-decimal))``
 
     That is a looser test than originally documented, but agrees with what the
     actual implementation in `assert_array_almost_equal` did up to rounding
@@ -595,10 +599,11 @@ def assert_almost_equal(actual,desired,decimal=7,err_msg='',verbose=True):
             return
     except (NotImplementedError, TypeError):
         pass
-    if abs(desired - actual) >= 1.5 * 10.0**(-decimal):
+    if abs(desired - actual) >= np.float64(1.5 * 10.0**(-decimal)):
         raise AssertionError(_build_err_msg())
 
 
+@np._no_nep50_warning()
 def assert_approx_equal(actual,desired,significant=7,err_msg='',verbose=True):
     """
     Raises an AssertionError if two items are not equal up to significant
@@ -698,8 +703,10 @@ def assert_approx_equal(actual,desired,significant=7,err_msg='',verbose=True):
         raise AssertionError(msg)
 
 
+@np._no_nep50_warning()
 def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='',
-                         precision=6, equal_nan=True, equal_inf=True):
+                         precision=6, equal_nan=True, equal_inf=True,
+                         *, strict=False):
     __tracebackhide__ = True  # Hide traceback for py.test
     from numpy.core import array, array2string, isnan, inf, bool_, errstate, all, max, object_
 
@@ -753,11 +760,18 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='',
             return y_id
 
     try:
-        cond = (x.shape == () or y.shape == ()) or x.shape == y.shape
+        if strict:
+            cond = x.shape == y.shape and x.dtype == y.dtype
+        else:
+            cond = (x.shape == () or y.shape == ()) or x.shape == y.shape
         if not cond:
+            if x.shape != y.shape:
+                reason = f'\n(shapes {x.shape}, {y.shape} mismatch)'
+            else:
+                reason = f'\n(dtypes {x.dtype}, {y.dtype} mismatch)'
             msg = build_err_msg([x, y],
                                 err_msg
-                                + f'\n(shapes {x.shape}, {y.shape} mismatch)',
+                                + reason,
                                 verbose=verbose, header=header,
                                 names=('x', 'y'), precision=precision)
             raise AssertionError(msg)
@@ -814,6 +828,9 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='',
                 # ignore errors for non-numeric types
                 with contextlib.suppress(TypeError):
                     error = abs(x - y)
+                    if np.issubdtype(x.dtype, np.unsignedinteger):
+                        error2 = abs(y - x)
+                        np.minimum(error, error2, out=error)
                     max_abs_error = max(error)
                     if getattr(error, 'dtype', object_) == object_:
                         remarks.append('Max absolute difference: '
@@ -852,7 +869,7 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, header='',
         raise ValueError(msg)
 
 
-def assert_array_equal(x, y, err_msg='', verbose=True):
+def assert_array_equal(x, y, err_msg='', verbose=True, *, strict=False):
     """
     Raises an AssertionError if two array_like objects are not equal.
 
@@ -876,6 +893,10 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
         The error message to be printed in case of failure.
     verbose : bool, optional
         If True, the conflicting values are appended to the error message.
+    strict : bool, optional
+        If True, raise an AssertionError when either the shape or the data
+        type of the array_like objects does not match. The special
+        handling for scalars mentioned in the Notes section is disabled.
 
     Raises
     ------
@@ -892,7 +913,7 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
     -----
     When one of `x` and `y` is a scalar and the other is array_like, the
     function checks that each element of the array_like object is equal to
-    the scalar.
+    the scalar. This behaviour can be disabled with the `strict` parameter.
 
     Examples
     --------
@@ -929,12 +950,41 @@ def assert_array_equal(x, y, err_msg='', verbose=True):
     >>> x = np.full((2, 5), fill_value=3)
     >>> np.testing.assert_array_equal(x, 3)
 
+    Use `strict` to raise an AssertionError when comparing a scalar with an
+    array:
+
+    >>> np.testing.assert_array_equal(x, 3, strict=True)
+    Traceback (most recent call last):
+        ...
+    AssertionError:
+    Arrays are not equal
+    <BLANKLINE>
+    (shapes (2, 5), () mismatch)
+     x: array([[3, 3, 3, 3, 3],
+           [3, 3, 3, 3, 3]])
+     y: array(3)
+
+    The `strict` parameter also ensures that the array data types match:
+
+    >>> x = np.array([2, 2, 2])
+    >>> y = np.array([2., 2., 2.], dtype=np.float32)
+    >>> np.testing.assert_array_equal(x, y, strict=True)
+    Traceback (most recent call last):
+        ...
+    AssertionError:
+    Arrays are not equal
+    <BLANKLINE>
+    (dtypes int64, float32 mismatch)
+     x: array([2, 2, 2])
+     y: array([2., 2., 2.], dtype=float32)
     """
     __tracebackhide__ = True  # Hide traceback for py.test
     assert_array_compare(operator.__eq__, x, y, err_msg=err_msg,
-                         verbose=verbose, header='Arrays are not equal')
+                         verbose=verbose, header='Arrays are not equal',
+                         strict=strict)
 
 
+@np._no_nep50_warning()
 def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True):
     """
     Raises an AssertionError if two objects are not equal up to desired
diff --git a/numpy/testing/_private/utils.pyi b/numpy/testing/_private/utils.pyi
index 0be13b729..6e051e914 100644
--- a/numpy/testing/_private/utils.pyi
+++ b/numpy/testing/_private/utils.pyi
@@ -200,6 +200,8 @@ def assert_array_compare(
     precision: SupportsIndex = ...,
     equal_nan: bool = ...,
     equal_inf: bool = ...,
+    *,
+    strict: bool = ...
 ) -> None: ...
 
 def assert_array_equal(
@@ -207,6 +209,8 @@ def assert_array_equal(
     y: ArrayLike,
     err_msg: str = ...,
     verbose: bool = ...,
+    *,
+    strict: bool = ...
 ) -> None: ...
 
 def assert_array_almost_equal(
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 49eeecc8e..377f570bd 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -214,6 +214,43 @@ class TestArrayEqual(_GenericTest):
                     np.array([1, 2, 3], np.float32),
                     np.array([1, 1e-40, 3], np.float32))
 
+    def test_array_vs_scalar_is_equal(self):
+        """Test comparing an array with a scalar when all values are equal."""
+        a = np.array([1., 1., 1.])
+        b = 1.
+
+        self._test_equal(a, b)
+
+    def test_array_vs_scalar_not_equal(self):
+        """Test comparing an array with a scalar when not all values equal."""
+        a = np.array([1., 2., 3.])
+        b = 1.
+
+        self._test_not_equal(a, b)
+
+    def test_array_vs_scalar_strict(self):
+        """Test comparing an array with a scalar with strict option."""
+        a = np.array([1., 1., 1.])
+        b = 1.
+
+        with pytest.raises(AssertionError):
+            assert_array_equal(a, b, strict=True)
+
+    def test_array_vs_array_strict(self):
+        """Test comparing two arrays with strict option."""
+        a = np.array([1., 1., 1.])
+        b = np.array([1., 1., 1.])
+
+        assert_array_equal(a, b, strict=True)
+
+    def test_array_vs_float_array_strict(self):
+        """Test comparing two arrays with strict option."""
+        a = np.array([1, 1, 1])
+        b = np.array([1., 1., 1.])
+
+        with pytest.raises(AssertionError):
+            assert_array_equal(a, b, strict=True)
+
 
 class TestBuildErrorMessage:
 
@@ -916,6 +953,20 @@ class TestAssertAllclose:
         a = np.array([[1, 2, 3, "NaT"]], dtype="m8[ns]")
         assert_allclose(a, a)
 
+    def test_error_message_unsigned(self):
+        """Check the the message is formatted correctly when overflow can occur
+           (gh21768)"""
+        # Ensure to test for potential overflow in the case of:
+        #        x - y
+        # and
+        #        y - x
+        x = np.asarray([0, 1, 8], dtype='uint8')
+        y = np.asarray([4, 4, 4], dtype='uint8')
+        with pytest.raises(AssertionError) as exc_info:
+            assert_allclose(x, y, atol=3)
+        msgs = str(exc_info.value).split('\n')
+        assert_equal(msgs[4], 'Max absolute difference: 4')
+
 
 class TestArrayAlmostEqualNulp:
 
diff --git a/numpy/typing/__init__.py b/numpy/typing/__init__.py
index 840b9ca72..5cf02fe86 100644
--- a/numpy/typing/__init__.py
+++ b/numpy/typing/__init__.py
@@ -5,7 +5,7 @@ Typing (:mod:`numpy.typing`)
 
 .. versionadded:: 1.20
 
-Large parts of the NumPy API have PEP-484-style type annotations. In
+Large parts of the NumPy API have :pep:`484`-style type annotations. In
 addition a number of type aliases are available to users, most prominently
 the two below: