53 files changed, 4776 insertions, 992 deletions
diff --git a/numpy/_pytesttester.py b/numpy/_pytesttester.py
index 33fee9a14..813e069a4 100644
--- a/numpy/_pytesttester.py
+++ b/numpy/_pytesttester.py
@@ -6,7 +6,7 @@ boiler plate for doing that is to put the following in the module
 ``__init__.py`` file::
 
     from numpy._pytesttester import PytestTester
-    test = PytestTester(__name__).test
+    test = PytestTester(__name__)
     del PytestTester
 
 
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index 1868610f4..2d3a65391 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -50,8 +50,9 @@
 # Version 13 (NumPy 1.17) No change.
 # Version 13 (NumPy 1.18) No change.
 # Version 13 (NumPy 1.19) No change.
-# Version 13 (NumPy 1.20) No change.
 0x0000000d = 5b0e8bbded00b166125974fc71e80a33
 
-# Version 14 (NumPy 1.19) DType related API additions
+# Version 14 (NumPy 1.20)
+# DType related API additions.
+# A new field was added to the end of PyArrayObject_fields.
 0x0000000e = 17a0f366e55ec05e5c5c149123478452
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index 856db0410..ca6a22828 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -26,6 +26,7 @@ API_FILES = [join('multiarray', 'alloc.c'),
              join('multiarray', 'array_assign_array.c'),
              join('multiarray', 'array_assign_scalar.c'),
              join('multiarray', 'array_coercion.c'),
+             join('multiarray', 'array_method.c'),
              join('multiarray', 'arrayobject.c'),
              join('multiarray', 'arraytypes.c.src'),
              join('multiarray', 'buffer.c'),
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index d65e26827..efb052bc2 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -1375,7 +1375,7 @@ def resize(a, new_shape):
     reshaped_array : ndarray
         The new array is formed from the data in the old array, repeated
         if necessary to fill out the required number of elements.  The
-        data are repeated in the order that they are stored in memory.
+        data are repeated iterating over the array in C-order.
 
     See Also
     --------
@@ -1392,11 +1392,11 @@ def resize(a, new_shape):
 
     Warning: This functionality does **not** consider axes separately,
     i.e. it does not apply interpolation/extrapolation.
-    It fills the return array with the required number of elements, taken
-    from `a` as they are laid out in memory, disregarding strides and axes.
-    (This is in case the new shape is smaller. For larger, see above.)
-    This functionality is therefore not suitable to resize images,
-    or data where each axis represents a separate and distinct entity.
+    It fills the return array with the required number of elements, iterating
+    over `a` in C-order, disregarding axes (and cycling back from the start if
+    the new shape is larger).  This functionality is therefore not suitable to
+    resize images, or data where each axis represents a separate and distinct
+    entity.
 
     Examples
     --------
diff --git a/numpy/core/include/numpy/arrayscalars.h b/numpy/core/include/numpy/arrayscalars.h
index b282a2cd4..14a31988f 100644
--- a/numpy/core/include/numpy/arrayscalars.h
+++ b/numpy/core/include/numpy/arrayscalars.h
@@ -149,6 +149,7 @@ typedef struct {
         PyArray_Descr *descr;
         int flags;
         PyObject *base;
+        void *_buffer_info;  /* private buffer info, tagged to allow warning */
 } PyVoidScalarObject;
 
 /* Macros
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 6bf54938f..63e8bf974 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -210,6 +210,7 @@ typedef enum {
 
 /* For specifying allowed casting in operations which support it */
 typedef enum {
+        _NPY_ERROR_OCCURRED_IN_CAST = -1,
         /* Only allow identical types */
         NPY_NO_CASTING=0,
         /* Allow identical and byte swapped types */
@@ -219,7 +220,14 @@ typedef enum {
         /* Allow safe casts or casts within the same kind */
         NPY_SAME_KIND_CASTING=3,
         /* Allow any casts */
-        NPY_UNSAFE_CASTING=4
+        NPY_UNSAFE_CASTING=4,
+        /*
+         * Flag to allow signalling that a cast is a view, this flag is not
+         * valid when requesting a cast of specific safety.
+         * _NPY_CAST_IS_VIEW|NPY_EQUIV_CASTING means the same as NPY_NO_CASTING.
+         */
+        // TODO-DTYPES: Needs to be documented.
+        _NPY_CAST_IS_VIEW = 1 << 16,
 } NPY_CASTING;
 
 typedef enum {
@@ -701,6 +709,7 @@ typedef struct tagPyArrayObject_fields {
     int flags;
     /* For weak references */
     PyObject *weakreflist;
+    void *_buffer_info;  /* private buffer info, tagged to allow warning */
 } PyArrayObject_fields;
 
 /*
@@ -720,7 +729,18 @@ typedef struct tagPyArrayObject {
 } PyArrayObject;
 #endif
 
-#define NPY_SIZEOF_PYARRAYOBJECT (sizeof(PyArrayObject_fields))
+/*
+ * Removed 2020-Nov-25, NumPy 1.20
+ * #define NPY_SIZEOF_PYARRAYOBJECT (sizeof(PyArrayObject_fields))
+ *
+ * The above macro was removed as it gave a false sense of a stable ABI
+ * with respect to the structures size.  If you require a runtime constant,
+ * you can use `PyArray_Type.tp_basicsize` instead.  Otherwise, please
+ * see the PyArrayObject documentation or ask the NumPy developers for
+ * information on how to correctly replace the macro in a way that is
+ * compatible with multiple NumPy versions.
+ */
+
 
 /* Array Flags Object */
 typedef struct PyArrayFlagsObject {
@@ -1900,6 +1920,12 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
         default_descr_function *default_descr;
         common_dtype_function *common_dtype;
         common_instance_function *common_instance;
+        /*
+         * Dictionary of ArrayMethods representing most possible casts
+         * (structured and object are exceptions).
+         * This should potentially become a weak mapping in the future.
+         */
+        PyObject *castingimpls;
     };
 
 #endif  /* NPY_INTERNAL_BUILD */
diff --git a/numpy/core/include/numpy/numpyconfig.h b/numpy/core/include/numpy/numpyconfig.h
index 8eaf446b7..a1b1de0ef 100644
--- a/numpy/core/include/numpy/numpyconfig.h
+++ b/numpy/core/include/numpy/numpyconfig.h
@@ -41,6 +41,7 @@
 #define NPY_1_17_API_VERSION 0x00000008
 #define NPY_1_18_API_VERSION 0x00000008
 #define NPY_1_19_API_VERSION 0x00000008
-#define NPY_1_20_API_VERSION 0x00000008
+#define NPY_1_20_API_VERSION 0x0000000e
+#define NPY_1_21_API_VERSION 0x0000000e
 
 #endif
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 6ada03f73..2ec5e1a64 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -23,6 +23,11 @@ NPY_RELAXED_STRIDES_CHECKING = (os.environ.get('NPY_RELAXED_STRIDES_CHECKING', "
 NPY_RELAXED_STRIDES_DEBUG = (os.environ.get('NPY_RELAXED_STRIDES_DEBUG', "0") != "0")
 NPY_RELAXED_STRIDES_DEBUG = NPY_RELAXED_STRIDES_DEBUG and NPY_RELAXED_STRIDES_CHECKING
 
+# Set to True to use the new casting implementation as much as implemented.
+# Allows running the full test suit to exercise the new machinery until
+# it is used as default and the old version is eventually deleted.
+NPY_USE_NEW_CASTINGIMPL = os.environ.get('NPY_USE_NEW_CASTINGIMPL', "0") != "0"
+
 # XXX: ugly, we use a class to avoid calling twice some expensive functions in
 # config.h/numpyconfig.h. I don't see a better way because distutils force
 # config.h generation inside an Extension class, and as such sharing
@@ -468,6 +473,10 @@ def configuration(parent_package='',top_path=None):
             if NPY_RELAXED_STRIDES_DEBUG:
                 moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1))
 
+            # Use the new experimental casting implementation in NumPy 1.20:
+            if NPY_USE_NEW_CASTINGIMPL:
+                moredefs.append(('NPY_USE_NEW_CASTINGIMPL', 1))
+
             # Get long double representation
             rep = check_long_double_representation(config_cmd)
             moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1))
@@ -769,6 +778,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'arraytypes.h'),
             join('src', 'multiarray', 'arrayfunction_override.h'),
             join('src', 'multiarray', 'array_coercion.h'),
+            join('src', 'multiarray', 'array_method.h'),
             join('src', 'multiarray', 'npy_buffer.h'),
             join('src', 'multiarray', 'calculation.h'),
             join('src', 'multiarray', 'common.h'),
@@ -784,6 +794,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'getset.h'),
             join('src', 'multiarray', 'hashdescr.h'),
             join('src', 'multiarray', 'iterators.h'),
+            join('src', 'multiarray', 'legacy_dtype_implementation.h'),
             join('src', 'multiarray', 'mapping.h'),
             join('src', 'multiarray', 'methods.h'),
             join('src', 'multiarray', 'multiarraymodule.h'),
@@ -824,6 +835,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'arrayobject.c'),
             join('src', 'multiarray', 'arraytypes.c.src'),
             join('src', 'multiarray', 'array_coercion.c'),
+            join('src', 'multiarray', 'array_method.c'),
             join('src', 'multiarray', 'array_assign_scalar.c'),
             join('src', 'multiarray', 'array_assign_array.c'),
             join('src', 'multiarray', 'arrayfunction_override.c'),
@@ -850,6 +862,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'hashdescr.c'),
             join('src', 'multiarray', 'item_selection.c'),
             join('src', 'multiarray', 'iterators.c'),
+            join('src', 'multiarray', 'legacy_dtype_implementation.c'),
             join('src', 'multiarray', 'lowlevel_strided_loops.c.src'),
             join('src', 'multiarray', 'mapping.c'),
             join('src', 'multiarray', 'methods.c'),
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index f15425c87..ba3e215b3 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -40,7 +40,8 @@ C_ABI_VERSION = 0x01000009
 # 0x0000000c - 1.14.x
 # 0x0000000c - 1.15.x
 # 0x0000000d - 1.16.x
-# 0x0000000e - 1.19.x
+# 0x0000000d - 1.19.x
+# 0x0000000e - 1.20.x
 C_API_VERSION = 0x0000000e
 
 class MismatchCAPIWarning(Warning):
diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h
index 27328aa73..61cc3c7f1 100644
--- a/numpy/core/src/common/npy_config.h
+++ b/numpy/core/src/common/npy_config.h
@@ -19,6 +19,15 @@
 
 #endif
 
+/* Disable broken functions on z/OS */
+#if defined (__MVS__)
+
+#undef HAVE_POWF
+#undef HAVE_EXPF
+#undef HAVE___THREAD
+
+#endif
+
 /* Disable broken MS math functions */
 #if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(__MINGW32_VERSION)
 
diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h
index 421b03f93..c0d2f1967 100644
--- a/numpy/core/src/multiarray/_datetime.h
+++ b/numpy/core/src/multiarray/_datetime.h
@@ -373,4 +373,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
 NPY_NO_EXPORT PyArray_Descr *
 find_object_datetime_type(PyObject *obj, int type_num);
 
+NPY_NO_EXPORT int
+PyArray_InitializeDatetimeCasts(void);
+
 #endif
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index 5b6b6dc78..3811e87a8 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -9,6 +9,7 @@
 #include "common.h"
 #include "mem_overlap.h"
 #include "npy_extint128.h"
+#include "array_method.h"
 
 #if defined(MS_WIN32) || defined(__CYGWIN__)
 #define EXPORT(x) __declspec(dllexport) x
@@ -36,6 +37,7 @@ IsPythonScalar(PyObject * dummy, PyObject *args)
 
 #include "npy_pycompat.h"
 
+
 /** Function to test calling via ctypes */
 EXPORT(void*) forward_pointer(void *x)
 {
@@ -684,6 +686,39 @@ create_custom_field_dtype(PyObject *NPY_UNUSED(mod), PyObject *args)
 }
 
 
+PyObject *
+corrupt_or_fix_bufferinfo(PyObject *dummy, PyObject *obj)
+{
+    void **buffer_info_ptr;
+    if (PyArray_Check(obj)) {
+        buffer_info_ptr = &((PyArrayObject_fields *)obj)->_buffer_info;
+    }
+    else if (PyArray_IsScalar(obj, Void)) {
+        buffer_info_ptr = &((PyVoidScalarObject *)obj)->_buffer_info;
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError,
+                "argument must be an array or void scalar");
+        return NULL;
+    }
+    if (*buffer_info_ptr == NULL) {
+        /* set to an invalid value (as a subclass might accidentally) */
+        *buffer_info_ptr = obj;
+        assert(((uintptr_t)obj & 7) == 0);
+    }
+    else if (*buffer_info_ptr == obj) {
+        /* Reset to a NULL (good value) */
+        *buffer_info_ptr = NULL;
+    }
+    else {
+        PyErr_SetString(PyExc_TypeError,
+                "buffer was already exported, this test doesn't support that");
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+
 /* check no elison for avoided increfs */
 static PyObject *
 incref_elide(PyObject *dummy, PyObject *args)
@@ -977,6 +1012,79 @@ get_c_wrapping_array(PyObject* NPY_UNUSED(self), PyObject* arg)
 }
 
 
+static PyObject *
+get_all_cast_information(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(args))
+{
+    PyObject *result = PyList_New(0);
+    if (result == NULL) {
+        return NULL;
+    }
+    PyObject *classes = PyObject_CallMethod(
+            (PyObject *)&PyArrayDescr_Type, "__subclasses__", "");
+    if (classes == NULL) {
+        return NULL;
+    }
+    Py_SETREF(classes, PySequence_Fast(classes, NULL));
+    if (classes == NULL) {
+        goto fail;
+    }
+
+    Py_ssize_t nclass = PySequence_Length(classes);
+    for (Py_ssize_t  i = 0; i < nclass; i++) {
+        PyArray_DTypeMeta *from_dtype = (
+                (PyArray_DTypeMeta *)PySequence_Fast_GET_ITEM(classes, i));
+        if (from_dtype->abstract) {
+            /*
+             * TODO: In principle probably needs to recursively check this,
+             *       also we may allow casts to abstract dtypes at some point.
+             */
+            continue;
+        }
+
+        PyObject *to_dtype, *cast_obj;
+        Py_ssize_t pos = 0;
+
+        while (PyDict_Next(from_dtype->castingimpls, &pos, &to_dtype, &cast_obj)) {
+            if (cast_obj == Py_None) {
+                continue;
+            }
+            PyArrayMethodObject *cast = (PyArrayMethodObject *)cast_obj;
+
+            /* Pass some information about this cast out! */
+            PyObject *cast_info = Py_BuildValue("{sOsOsisisisisisssi}",
+                    "from", from_dtype,
+                    "to", to_dtype,
+                    "legacy", (cast->name != NULL &&
+                               strncmp(cast->name, "legacy_", 7) == 0),
+                    "casting", cast->casting & ~_NPY_CAST_IS_VIEW,
+                    "requires_pyapi", cast->flags & NPY_METH_REQUIRES_PYAPI,
+                    "supports_unaligned",
+                        cast->flags & NPY_METH_SUPPORTS_UNALIGNED,
+                    "no_floatingpoint_errors",
+                        cast->flags & NPY_METH_NO_FLOATINGPOINT_ERRORS,
+                    "name", cast->name,
+                    "cast_is_view",
+                        cast->casting & _NPY_CAST_IS_VIEW);
+            if (cast_info == NULL) {
+                goto fail;
+            }
+            int res = PyList_Append(result, cast_info);
+            Py_DECREF(cast_info);
+            if (res < 0) {
+                goto fail;
+            }
+        }
+    }
+    Py_DECREF(classes);
+    return result;
+
+  fail:
+    Py_XDECREF(classes);
+    Py_XDECREF(result);
+    return NULL;
+}
+
+
 /*
  * Test C-api level item getting.
  */
@@ -2010,6 +2118,18 @@ getset_numericops(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args))
     return ret;
 }
 
+
+static PyObject *
+uses_new_casts(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args))
+{
+#if NPY_USE_NEW_CASTINGIMPL
+    Py_RETURN_TRUE;
+#else
+    Py_RETURN_FALSE;
+#endif
+}
+
+
 static PyObject *
 run_byteorder_converter(PyObject* NPY_UNUSED(self), PyObject *args)
 {
@@ -2113,8 +2233,8 @@ run_casting_converter(PyObject* NPY_UNUSED(self), PyObject *args)
         case NPY_SAFE_CASTING: return PyUnicode_FromString("NPY_SAFE_CASTING");
         case NPY_SAME_KIND_CASTING: return PyUnicode_FromString("NPY_SAME_KIND_CASTING");
         case NPY_UNSAFE_CASTING: return PyUnicode_FromString("NPY_UNSAFE_CASTING");
+        default: return PyLong_FromLong(casting);
     }
-    return PyLong_FromLong(casting);
 }
 
 static PyObject *
@@ -2158,6 +2278,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"create_custom_field_dtype",
         create_custom_field_dtype,
         METH_VARARGS, NULL},
+    {"corrupt_or_fix_bufferinfo",
+        corrupt_or_fix_bufferinfo,
+        METH_O, NULL},
     {"incref_elide",
         incref_elide,
         METH_VARARGS, NULL},
@@ -2194,6 +2317,12 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"get_c_wrapping_array",
         get_c_wrapping_array,
         METH_O, NULL},
+    {"get_all_cast_information",
+        get_all_cast_information,
+        METH_NOARGS,
+        "Return a list with info on all available casts. Some of the info"
+        "may differ for an actual cast if it uses value-based casting "
+        "(flexible types)."},
     {"array_indexing",
         array_indexing,
         METH_VARARGS, NULL},
@@ -2254,6 +2383,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
     {"getset_numericops",
         getset_numericops,
         METH_NOARGS, NULL},
+    {"uses_new_casts",
+            uses_new_casts,
+            METH_NOARGS, NULL},
 /**begin repeat
  * #name = cabs, carg#
  */
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c
new file mode 100644
index 000000000..cae452454
--- /dev/null
+++ b/numpy/core/src/multiarray/array_method.c
@@ -0,0 +1,614 @@
+/*
+ * This file implements an abstraction layer for "Array methods", which
+ * work with a specific DType class input and provide low-level C function
+ * pointers to do fast operations on the given input functions.
+ * It thus adds an abstraction layer around individual ufunc loops.
+ *
+ * Unlike methods, a ArrayMethod can have multiple inputs and outputs.
+ * This has some serious implication for garbage collection, and as far
+ * as I (@seberg) understands, it is not possible to always guarantee correct
+ * cyclic garbage collection of dynamically created DTypes with methods.
+ * The keyword (or rather the solution) for this seems to be an "ephemeron"
+ * which I believe should allow correct garbage collection but seems
+ * not implemented in Python at this time.
+ * The vast majority of use-cases will not require correct garbage collection.
+ * Some use cases may require the user to be careful.
+ *
+ * Generally there are two main ways to solve this issue:
+ *
+ * 1. A method with a single input (or inputs of all the same DTypes) can
+ *    be "owned" by that DType (it becomes unusable when the DType is deleted).
+ *    This holds especially for all casts, which must have a defined output
+ *    DType and must hold on to it strongly.
+ * 2. A method which can infer the output DType(s) from the input types does
+ *    not need to keep the output type alive. (It can use NULL for the type,
+ *    or an abstract base class which is known to be persistent.)
+ *    It is then sufficient for a ufunc (or other owner) to only hold a
+ *    weak reference to the input DTypes.
+ */
+
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include <npy_pycompat.h>
+#include "arrayobject.h"
+#include "array_method.h"
+#include "dtypemeta.h"
+#include "convert_datatype.h"
+
+
+/*
+ * The default descriptor resolution function.  The logic is as follows:
+ *
+ * 1. The output is ensured to be canonical (currently native byte order),
+ *    if it is of the correct DType.
+ * 2. If any DType is was not defined, it is replaced by the common DType
+ *    of all inputs. (If that common DType is parametric, this is an error.)
+ *
+ * We could allow setting the output descriptors specifically to simplify
+ * this step.
+ */
+static NPY_CASTING
+default_resolve_descriptors(
+        PyArrayMethodObject *method,
+        PyArray_DTypeMeta **dtypes,
+        PyArray_Descr **input_descrs,
+        PyArray_Descr **output_descrs)
+{
+    int nin = method->nin;
+    int nout = method->nout;
+    int all_defined = 1;
+
+    for (int i = 0; i < nin + nout; i++) {
+        PyArray_DTypeMeta *dtype = dtypes[i];
+        if (dtype == NULL) {
+            output_descrs[i] = NULL;
+            all_defined = 0;
+            continue;
+        }
+        if (NPY_DTYPE(input_descrs[i]) == dtype) {
+            output_descrs[i] = ensure_dtype_nbo(input_descrs[i]);
+        }
+        else {
+            output_descrs[i] = dtype->default_descr(dtype);
+        }
+        if (NPY_UNLIKELY(output_descrs[i] == NULL)) {
+            goto fail;
+        }
+    }
+    if (all_defined) {
+        return method->casting;
+    }
+
+    if (NPY_UNLIKELY(nin == 0 || dtypes[0] == NULL)) {
+        /* Registration should reject this, so this would be indicates a bug */
+        PyErr_SetString(PyExc_RuntimeError,
+                "Invalid use of default resolver without inputs or with "
+                "input or output DType incorrectly missing.");
+        goto fail;
+    }
+    /* We find the common dtype of all inputs, and use it for the unknowns */
+    PyArray_DTypeMeta *common_dtype = dtypes[0];
+    assert(common_dtype != NULL);
+    for (int i = 1; i < nin; i++) {
+        Py_SETREF(common_dtype, PyArray_CommonDType(common_dtype, dtypes[i]));
+        if (common_dtype == NULL) {
+            goto fail;
+        }
+    }
+    for (int i = nin; i < nin + nout; i++) {
+        if (output_descrs[i] != NULL) {
+            continue;
+        }
+        if (NPY_DTYPE(input_descrs[i]) == common_dtype) {
+            output_descrs[i] = ensure_dtype_nbo(input_descrs[i]);
+        }
+        else {
+            output_descrs[i] = common_dtype->default_descr(common_dtype);
+        }
+        if (NPY_UNLIKELY(output_descrs[i] == NULL)) {
+            goto fail;
+        }
+    }
+
+    return method->casting;
+
+  fail:
+    for (int i = 0; i < nin + nout; i++) {
+        Py_XDECREF(output_descrs[i]);
+    }
+    return -1;
+}
+
+
+/**
+ * The default method to fetch the correct loop for a cast or ufunc
+ * (at the time of writing only casts).
+ * The default version can return loops explicitly registered during method
+ * creation. It does specialize contiguous loops, although has to check
+ * all descriptors itemsizes for this.
+ *
+ * @param context
+ * @param aligned
+ * @param move_references UNUSED.
+ * @param strides
+ * @param descriptors
+ * @param out_loop
+ * @param out_transferdata
+ * @param flags
+ * @return 0 on success -1 on failure.
+ */
+static int
+default_get_strided_loop(
+        PyArrayMethod_Context *NPY_UNUSED(context),
+        int NPY_UNUSED(aligned), int NPY_UNUSED(move_references),
+        npy_intp *NPY_UNUSED(strides),
+        PyArray_StridedUnaryOp **NPY_UNUSED(out_loop),
+        NpyAuxData **NPY_UNUSED(out_transferdata),
+        NPY_ARRAYMETHOD_FLAGS *NPY_UNUSED(flags))
+{
+    PyErr_SetString(PyExc_NotImplementedError,
+            "default loop getter is not implemented");
+    return -1;
+}
+
+
+/**
+ * Validate that the input is usable to create a new ArrayMethod.
+ *
+ * @param spec
+ * @return 0 on success -1 on error.
+ */
+static int
+validate_spec(PyArrayMethod_Spec *spec)
+{
+    int nargs = spec->nin + spec->nout;
+    /* Check the passed spec for invalid fields/values */
+    if (spec->nin < 0 || spec->nout < 0 || nargs > NPY_MAXARGS) {
+        PyErr_Format(PyExc_ValueError,
+                "ArrayMethod inputs and outputs must be greater zero and"
+                "not exceed %d. (method: %s)", NPY_MAXARGS, spec->name);
+        return -1;
+    }
+    switch (spec->casting & ~_NPY_CAST_IS_VIEW) {
+        case NPY_NO_CASTING:
+        case NPY_EQUIV_CASTING:
+        case NPY_SAFE_CASTING:
+        case NPY_SAME_KIND_CASTING:
+        case NPY_UNSAFE_CASTING:
+            break;
+        default:
+            PyErr_Format(PyExc_TypeError,
+                    "ArrayMethod has invalid casting `%d`. (method: %s)",
+                    spec->casting, spec->name);
+            return -1;
+    }
+
+    for (int i = 0; i < nargs; i++) {
+        if (spec->dtypes[i] == NULL && i < spec->nin) {
+            PyErr_Format(PyExc_TypeError,
+                    "ArrayMethod must have well defined input DTypes. "
+                    "(method: %s)", spec->name);
+            return -1;
+        }
+        if (!PyObject_TypeCheck(spec->dtypes[i], &PyArrayDTypeMeta_Type)) {
+            PyErr_Format(PyExc_TypeError,
+                    "ArrayMethod provided object %R is not a DType."
+                    "(method: %s)", spec->dtypes[i], spec->name);
+            return -1;
+        }
+        if (spec->dtypes[i]->abstract && i < spec->nin) {
+            PyErr_Format(PyExc_TypeError,
+                    "abstract DType %S are currently not allowed for inputs."
+                    "(method: %s defined at %s)", spec->dtypes[i], spec->name);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+
+/**
+ * Initialize a new BoundArrayMethodObject from slots.  Slots which are
+ * not provided may be filled with defaults.
+ *
+ * @param res The new PyBoundArrayMethodObject to be filled.
+ * @param spec The specification list passed by the user.
+ * @param private Private flag to limit certain slots to use in NumPy.
+ * @return -1 on error 0 on success
+ */
+static int
+fill_arraymethod_from_slots(
+        PyBoundArrayMethodObject *res, PyArrayMethod_Spec *spec,
+        int private)
+{
+    PyArrayMethodObject *meth = res->method;
+
+    /* Set the defaults */
+    meth->get_strided_loop = &default_get_strided_loop;
+    meth->resolve_descriptors = &default_resolve_descriptors;
+
+    /* Fill in the slots passed by the user */
+    /*
+     * TODO: This is reasonable for now, but it would be nice to find a
+     *       shorter solution, and add some additional error checking (e.g.
+     *       the same slot used twice). Python uses an array of slot offsets.
+     */
+    for (PyType_Slot *slot = &spec->slots[0]; slot->slot != 0; slot++) {
+        switch (slot->slot) {
+            case NPY_METH_resolve_descriptors:
+                meth->resolve_descriptors = slot->pfunc;
+                continue;
+            case NPY_METH_get_loop:
+                if (private) {
+                    /* Only allow override for private functions initially */
+                    meth->get_strided_loop = slot->pfunc;
+                    continue;
+                }
+                break;
+            case NPY_METH_strided_loop:
+                meth->strided_loop = slot->pfunc;
+                continue;
+            case NPY_METH_contiguous_loop:
+                meth->contiguous_loop = slot->pfunc;
+                continue;
+            case NPY_METH_unaligned_strided_loop:
+                meth->unaligned_strided_loop = slot->pfunc;
+                continue;
+            case NPY_METH_unaligned_contiguous_loop:
+                meth->unaligned_contiguous_loop = slot->pfunc;
+                continue;
+            default:
+                break;
+        }
+        PyErr_Format(PyExc_RuntimeError,
+                "invalid slot number %d to ArrayMethod: %s",
+                slot->slot, spec->name);
+        return -1;
+    }
+
+    /* Check whether the slots are valid: */
+    if (meth->resolve_descriptors == &default_resolve_descriptors) {
+        for (int i = 0; i < meth->nin + meth->nout; i++) {
+            if (res->dtypes[i] == NULL) {
+                if (i < meth->nin) {
+                    PyErr_Format(PyExc_TypeError,
+                            "All input DTypes must be specified when using "
+                            "the default `resolve_descriptors` function. "
+                            "(method: %s)", spec->name);
+                    return -1;
+                }
+                else if (meth->nin == 0) {
+                    PyErr_Format(PyExc_TypeError,
+                            "Must specify output DTypes or use custom "
+                            "`resolve_descriptors` when there are no inputs. "
+                            "(method: %s defined at %s)", spec->name);
+                    return -1;
+                }
+            }
+            if (i >= meth->nin && res->dtypes[i]->parametric) {
+                PyErr_Format(PyExc_TypeError,
+                        "must provide a `resolve_descriptors` function if any "
+                        "output DType is parametric. (method: %s)",
+                        spec->name);
+                return -1;
+            }
+        }
+    }
+    if (meth->get_strided_loop != &default_get_strided_loop) {
+        /* Do not check the actual loop fields. */
+        return 0;
+    }
+
+    /* Check whether the provided loops make sense. */
+    if (meth->strided_loop == NULL) {
+        PyErr_Format(PyExc_TypeError,
+                "Must provide a strided inner loop function. (method: %s)",
+                spec->name);
+        return -1;
+    }
+    if (meth->contiguous_loop == NULL) {
+        meth->contiguous_loop = meth->strided_loop;
+    }
+    if (meth->unaligned_contiguous_loop != NULL &&
+            meth->unaligned_strided_loop == NULL) {
+        PyErr_Format(PyExc_TypeError,
+                "Must provide unaligned strided inner loop when providing "
+                "a contiguous version. (method: %s)", spec->name);
+        return -1;
+    }
+    if ((meth->unaligned_strided_loop == NULL) !=
+            !(meth->flags & NPY_METH_SUPPORTS_UNALIGNED)) {
+        PyErr_Format(PyExc_TypeError,
+                "Must provide unaligned strided inner loop when providing "
+                "a contiguous version. (method: %s)", spec->name);
+        return -1;
+    }
+
+    return 0;
+}
+
+
+/**
+ * Create a new ArrayMethod (internal version).
+ *
+ * @param name A name for the individual method, may be NULL.
+ * @param spec A filled context object to pass generic information about
+ *        the method (such as usually needing the API, and the DTypes).
+ *        Unused fields must be NULL.
+ * @param slots Slots with the correct pair of IDs and (function) pointers.
+ * @param private Some slots are currently considered private, if not true,
+ *        these will be rejected.
+ *
+ * @returns A new (bound) ArrayMethod object.
+ */
+NPY_NO_EXPORT PyBoundArrayMethodObject *
+PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private)
+{
+    int nargs = spec->nin + spec->nout;
+
+    if (spec->name == NULL) {
+        spec->name = "<unknown>";
+    }
+
+    if (validate_spec(spec) < 0) {
+        return NULL;
+    }
+
+    PyBoundArrayMethodObject *res;
+    res = PyObject_New(PyBoundArrayMethodObject, &PyBoundArrayMethod_Type);
+    if (res == NULL) {
+        return NULL;
+    }
+    res->method = NULL;
+
+    res->dtypes = PyMem_Malloc(sizeof(PyArray_DTypeMeta *) * nargs);
+    if (res->dtypes == NULL) {
+        Py_DECREF(res);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    for (int i = 0; i < nargs ; i++) {
+        Py_XINCREF(spec->dtypes[i]);
+        res->dtypes[i] = spec->dtypes[i];
+    }
+
+    res->method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type);
+    if (res->method == NULL) {
+        Py_DECREF(res);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    memset((char *)(res->method) + sizeof(PyObject), 0,
+           sizeof(PyArrayMethodObject) - sizeof(PyObject));
+
+    res->method->nin = spec->nin;
+    res->method->nout = spec->nout;
+    res->method->flags = spec->flags;
+    res->method->casting = spec->casting;
+    if (fill_arraymethod_from_slots(res, spec, private) < 0) {
+        Py_DECREF(res);
+        return NULL;
+    }
+
+    ssize_t length = strlen(spec->name);
+    res->method->name = PyMem_Malloc(length + 1);
+    if (res->method->name == NULL) {
+        Py_DECREF(res);
+        PyErr_NoMemory();
+        return NULL;
+    }
+    strcpy(res->method->name, spec->name);
+
+    return res;
+}
+
+
+static void
+arraymethod_dealloc(PyObject *self)
+{
+    PyArrayMethodObject *meth;
+    meth = ((PyArrayMethodObject *)self);
+
+    PyMem_Free(meth->name);
+
+    Py_TYPE(self)->tp_free(self);
+}
+
+
+NPY_NO_EXPORT PyTypeObject PyArrayMethod_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "numpy._ArrayMethod",
+    .tp_basicsize = sizeof(PyArrayMethodObject),
+    .tp_flags = Py_TPFLAGS_DEFAULT,
+    .tp_dealloc = arraymethod_dealloc,
+};
+
+
+
+static PyObject *
+boundarraymethod_repr(PyBoundArrayMethodObject *self)
+{
+    int nargs = self->method->nin + self->method->nout;
+    PyObject *dtypes = PyTuple_New(nargs);
+    if (dtypes == NULL) {
+        return NULL;
+    }
+    for (int i = 0; i < nargs; i++) {
+        Py_INCREF(self->dtypes[i]);
+        PyTuple_SET_ITEM(dtypes, i, (PyObject *)self->dtypes[i]);
+    }
+    return PyUnicode_FromFormat(
+            "<np._BoundArrayMethod `%s` for dtypes %S>",
+            self->method->name, dtypes);
+}
+
+
+static void
+boundarraymethod_dealloc(PyObject *self)
+{
+    PyBoundArrayMethodObject *meth;
+    meth = ((PyBoundArrayMethodObject *)self);
+    int nargs = meth->method->nin + meth->method->nout;
+
+    for (int i = 0; i < nargs; i++) {
+        Py_XDECREF(meth->dtypes[i]);
+    }
+    PyMem_Free(meth->dtypes);
+
+    Py_XDECREF(meth->method);
+
+    Py_TYPE(self)->tp_free(self);
+}
+
+
+/*
+ * Calls resolve_descriptors() and returns the casting level and the resolved
+ * descriptors as a tuple. If the operation is impossible returns (-1, None).
+ * May raise an error, but usually should not.
+ * The function validates the casting attribute compared to the returned
+ * casting level.
+ */
+static PyObject *
+boundarraymethod__resolve_descripors(
+        PyBoundArrayMethodObject *self, PyObject *descr_tuple)
+{
+    int nin = self->method->nin;
+    int nout = self->method->nout;
+
+    PyArray_Descr *given_descrs[NPY_MAXARGS];
+    PyArray_Descr *loop_descrs[NPY_MAXARGS];
+
+    if (!PyTuple_CheckExact(descr_tuple) ||
+            PyTuple_Size(descr_tuple) != nin + nout) {
+        PyErr_Format(PyExc_ValueError,
+                "_resolve_descriptors() takes exactly one tuple with as many "
+                "elements as the method takes arguments (%d+%d).", nin, nout);
+        return NULL;
+    }
+
+    for (int i = 0; i < nin + nout; i++) {
+        PyObject *tmp = PyTuple_GetItem(descr_tuple, i);
+        if (tmp == NULL) {
+            return NULL;
+        }
+        else if (tmp == Py_None) {
+            if (i < nin) {
+                PyErr_SetString(PyExc_ValueError,
+                        "only output dtypes may be omitted (set to None).");
+                return NULL;
+            }
+            given_descrs[i] = NULL;
+        }
+        else if (PyArray_DescrCheck(tmp)) {
+            if (Py_TYPE(tmp) != (PyTypeObject *)self->dtypes[i]) {
+                PyErr_Format(PyExc_ValueError,
+                        "input dtype %S was not an exact instance of the bound "
+                        "DType class %S.", tmp, self->dtypes[i]);
+                return NULL;
+            }
+            given_descrs[i] = (PyArray_Descr *)tmp;
+        }
+        else {
+            PyErr_SetString(PyExc_TypeError,
+                    "dtype tuple can only contain dtype instances or None.");
+            return NULL;
+        }
+    }
+
+    NPY_CASTING casting = self->method->resolve_descriptors(
+            self->method, self->dtypes, given_descrs, loop_descrs);
+
+    if (casting < 0 && PyErr_Occurred()) {
+        return NULL;
+    }
+    else if (casting < 0) {
+        return Py_BuildValue("iO", casting, Py_None);
+    }
+
+    PyObject *result_tuple = PyTuple_New(nin + nout);
+    if (result_tuple == NULL) {
+        return NULL;
+    }
+    for (int i = 0; i < nin + nout; i++) {
+        /* transfer ownership to the tuple. */
+        PyTuple_SET_ITEM(result_tuple, i, (PyObject *)loop_descrs[i]);
+    }
+
+    /*
+     * The casting flags should be the most generic casting level (except the
+     * cast-is-view flag.  If no input is parametric, it must match exactly.
+     */
+    int parametric = 0;
+    for (int i = 0; i < nin + nout; i++) {
+        if (self->dtypes[i]->parametric) {
+            parametric = 1;
+            break;
+        }
+    }
+    if (!parametric) {
+        /*
+         * Non-parametric can only mismatch if it switches from no to equiv
+         * (e.g. due to byteorder changes).
+         */
+        if (self->method->casting != (casting & ~_NPY_CAST_IS_VIEW) &&
+                !(self->method->casting == NPY_NO_CASTING &&
+                  casting == NPY_EQUIV_CASTING)) {
+            PyErr_Format(PyExc_RuntimeError,
+                    "resolve_descriptors cast level did not match stored one "
+                    "(expected %d, got %d) for method %s",
+                    self->method->casting, (casting & ~_NPY_CAST_IS_VIEW),
+                    self->method->name);
+            Py_DECREF(result_tuple);
+            return NULL;
+        }
+    }
+    else {
+        NPY_CASTING cast = casting & ~_NPY_CAST_IS_VIEW;
+        if (cast != PyArray_MinCastSafety(cast, self->method->casting)) {
+            PyErr_Format(PyExc_RuntimeError,
+                    "resolve_descriptors cast level did not match stored one "
+                    "(expected %d, got %d) for method %s",
+                    self->method->casting, (casting & ~_NPY_CAST_IS_VIEW),
+                    self->method->name);
+            Py_DECREF(result_tuple);
+            return NULL;
+        }
+    }
+
+    return Py_BuildValue("iN", casting, result_tuple);
+}
+
+
+PyMethodDef boundarraymethod_methods[] = {
+    {"_resolve_descriptors", (PyCFunction)boundarraymethod__resolve_descripors,
+     METH_O, "Resolve the given dtypes."},
+    {NULL, 0, 0, NULL},
+};
+
+
+static PyObject *
+boundarraymethod__supports_unaligned(PyBoundArrayMethodObject *self)
+{
+    return PyBool_FromLong(self->method->flags & NPY_METH_SUPPORTS_UNALIGNED);
+}
+
+
+PyGetSetDef boundarraymethods_getters[] = {
+    {"_supports_unaligned",
+     (getter)boundarraymethod__supports_unaligned, NULL,
+     "whether the method supports unaligned inputs/outputs.", NULL},
+    {NULL, NULL, NULL, NULL, NULL},
+};
+
+
+NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    .tp_name = "numpy._BoundArrayMethod",
+    .tp_basicsize = sizeof(PyBoundArrayMethodObject),
+    .tp_flags = Py_TPFLAGS_DEFAULT,
+    .tp_repr = (reprfunc)boundarraymethod_repr,
+    .tp_dealloc = boundarraymethod_dealloc,
+    .tp_methods = boundarraymethod_methods,
+    .tp_getset = boundarraymethods_getters,
+};
diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h
new file mode 100644
index 000000000..15ea948ce
--- /dev/null
+++ b/numpy/core/src/multiarray/array_method.h
@@ -0,0 +1,150 @@
+#ifndef _NPY_ARRAY_METHOD_H
+#define _NPY_ARRAY_METHOD_H
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include <Python.h>
+#include <numpy/ndarraytypes.h>
+#include <lowlevel_strided_loops.h>
+
+
+typedef enum {
+    /* Flag for whether the GIL is required */
+    NPY_METH_REQUIRES_PYAPI = 1 << 1,
+    /*
+     * Some functions cannot set floating point error flags, this flag
+     * gives us the option (not requirement) to skip floating point error
+     * setup/check. No function should set error flags and ignore them
+     * since it would interfere with chaining operations (e.g. casting).
+     */
+    NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2,
+    /* Whether the method supports unaligned access (not runtime) */
+    NPY_METH_SUPPORTS_UNALIGNED = 1 << 3,
+
+    /* All flags which can change at runtime */
+    NPY_METH_RUNTIME_FLAGS = (
+            NPY_METH_REQUIRES_PYAPI |
+            NPY_METH_NO_FLOATINGPOINT_ERRORS),
+} NPY_ARRAYMETHOD_FLAGS;
+
+
+struct PyArrayMethodObject_tag;
+
+/*
+ * This struct is specific to an individual (possibly repeated) call of
+ * the ArrayMethods strided operator, and as such is passed into the various
+ * methods of the ArrayMethod object (the resolve_descriptors function,
+ * the get_loop function and the individual lowlevel strided operator calls).
+ * It thus has to be persistent for one end-user call, and then be discarded.
+ *
+ * TODO: Before making this public, we should review which information should
+ *       be stored on the Context/BoundArrayMethod vs. the ArrayMethod.
+ */
+typedef struct {
+    PyObject *caller;  /* E.g. the original ufunc, may be NULL */
+    struct PyArrayMethodObject_tag *method;
+
+    /* Operand descriptors, filled in by resolve_descriptors */
+    PyArray_Descr **descriptors;
+} PyArrayMethod_Context;
+
+
+typedef NPY_CASTING (resolve_descriptors_function)(
+        struct PyArrayMethodObject_tag *method,
+        PyArray_DTypeMeta **dtypes,
+        PyArray_Descr **given_descrs,
+        PyArray_Descr **loop_descrs);
+
+
+typedef int (get_loop_function)(
+        PyArrayMethod_Context *context,
+        int aligned, int move_references,
+        npy_intp *strides,
+        PyArray_StridedUnaryOp **out_loop,
+        NpyAuxData **out_transferdata,
+        NPY_ARRAYMETHOD_FLAGS *flags);
+
+
+/*
+ * This struct will be public and necessary for creating a new ArrayMethod
+ * object (casting and ufuncs).
+ * We could version the struct, although since we allow passing arbitrary
+ * data using the slots, and have flags, that may be enough?
+ * (See also PyBoundArrayMethodObject.)
+ */
+typedef struct {
+    const char *name;
+    int nin, nout;
+    NPY_CASTING casting;
+    NPY_ARRAYMETHOD_FLAGS flags;
+    PyArray_DTypeMeta **dtypes;
+    PyType_Slot *slots;
+} PyArrayMethod_Spec;
+
+
+/*
+ * Structure of the ArrayMethod. This structure should probably not be made
+ * public. If necessary, we can make certain operations on it public
+ * (e.g. to allow users indirect access to `get_strided_loop`).
+ *
+ * NOTE: In some cases, it may not be clear whether information should be
+ * stored here or on the bound version. E.g. `nin` and `nout` (and in the
+ * future the gufunc `signature`) is already stored on the ufunc so that
+ * storing these here duplicates the information.
+ */
+typedef struct PyArrayMethodObject_tag {
+    PyObject_HEAD
+    char *name;
+    int nin, nout;
+    /* Casting is normally "safe" for functions, but is important for casts */
+    NPY_CASTING casting;
+    /* default flags. The get_strided_loop function can override these */
+    NPY_ARRAYMETHOD_FLAGS flags;
+    resolve_descriptors_function *resolve_descriptors;
+    get_loop_function *get_strided_loop;
+    /* Typical loop functions (contiguous ones are used in current casts) */
+    PyArray_StridedUnaryOp *strided_loop;
+    PyArray_StridedUnaryOp *contiguous_loop;
+    PyArray_StridedUnaryOp *unaligned_strided_loop;
+    PyArray_StridedUnaryOp *unaligned_contiguous_loop;
+} PyArrayMethodObject;
+
+
+/*
+ * We will sometimes have to create a ArrayMethod and allow passing it around,
+ * similar to `instance.method` returning a bound method, e.g. a function like
+ * `ufunc.resolve()` can return a bound object.
+ * The current main purpose of the BoundArrayMethod is that it holds on to the
+ * `dtypes` (the classes), so that the `ArrayMethod` (e.g. for casts) will
+ * not create references cycles.  In principle, it could hold any information
+ * which is also stored on the ufunc (and thus does not need to be repeated
+ * on the `ArrayMethod` itself.
+ */
+typedef struct {
+    PyObject_HEAD
+    PyArray_DTypeMeta **dtypes;
+    PyArrayMethodObject *method;
+} PyBoundArrayMethodObject;
+
+
+extern NPY_NO_EXPORT PyTypeObject PyArrayMethod_Type;
+extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type;
+
+/*
+ * SLOTS IDs For the ArrayMethod creation, one public, the IDs are fixed.
+ * TODO: Before making it public, consider adding a large constant to private
+ *       slots.
+ */
+#define NPY_METH_resolve_descriptors 1
+#define NPY_METH_get_loop 2
+#define NPY_METH_strided_loop 3
+#define NPY_METH_contiguous_loop 4
+#define NPY_METH_unaligned_strided_loop 5
+#define NPY_METH_unaligned_contiguous_loop 6
+
+
+NPY_NO_EXPORT PyBoundArrayMethodObject *
+PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private);
+
+#endif  /*_NPY_ARRAY_METHOD_H*/
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 5da1b5f29..a2474d79f 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -434,7 +434,9 @@ array_dealloc(PyArrayObject *self)
 {
     PyArrayObject_fields *fa = (PyArrayObject_fields *)self;
 
-    _dealloc_cached_buffer_info((PyObject*)self);
+    if (_buffer_info_free(fa->_buffer_info, (PyObject *)self) < 0) {
+        PyErr_WriteUnraisable(NULL);
+    }
 
     if (fa->weakreflist != NULL) {
         PyObject_ClearWeakRefs((PyObject *)self);
@@ -1745,7 +1747,7 @@ array_free(PyObject * v)
 NPY_NO_EXPORT PyTypeObject PyArray_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
     .tp_name = "numpy.ndarray",
-    .tp_basicsize = NPY_SIZEOF_PYARRAYOBJECT,
+    .tp_basicsize = sizeof(PyArrayObject_fields),
     /* methods */
     .tp_dealloc = (destructor)array_dealloc,
     .tp_repr = (reprfunc)array_repr,
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index 1f4f676ba..813850224 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -428,31 +428,23 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str,
 
 
 /*
- * Global information about all active buffers
+ * Information about all active buffers is stored as a linked list on
+ * the ndarray. The initial pointer is currently tagged to have a chance of
+ * detecting incompatible subclasses.
  *
  * Note: because for backward compatibility we cannot define bf_releasebuffer,
  * we must manually keep track of the additional data required by the buffers.
  */
 
 /* Additional per-array data required for providing the buffer interface */
-typedef struct {
+typedef struct _buffer_info_t_tag {
     char *format;
     int ndim;
     Py_ssize_t *strides;
     Py_ssize_t *shape;
+    struct _buffer_info_t_tag *next;
 } _buffer_info_t;
 
-/*
- * { id(array): [list of pointers to _buffer_info_t, the last one is latest] }
- *
- * Because shape, strides, and format can be different for different buffers,
- * we may need to keep track of multiple buffer infos for each array.
- *
- * However, when none of them has changed, the same buffer info may be reused.
- *
- * Thread-safety is provided by GIL.
- */
-static PyObject *_buffer_info_cache = NULL;
 
 /* Fill in the info structure */
 static _buffer_info_t*
@@ -564,6 +556,7 @@ _buffer_info_new(PyObject *obj, int flags)
         Py_DECREF(descr);
         info->format = NULL;
     }
+    info->next = NULL;
     return info;
 
 fail:
@@ -596,145 +589,161 @@ _buffer_info_cmp(_buffer_info_t *a, _buffer_info_t *b)
     return 0;
 }
 
-static void
-_buffer_info_free(_buffer_info_t *info)
-{
-    if (info->format) {
-        PyObject_Free(info->format);
-    }
-    PyObject_Free(info);
-}
 
-/* Get buffer info from the global dictionary */
-static _buffer_info_t*
-_buffer_get_info(PyObject *obj, int flags)
+/*
+ * Tag the buffer info pointer by adding 2 (unless it is NULL to simplify
+ * object initialization).
+ * The linked list of buffer-infos was appended to the array struct in
+ * NumPy 1.20. Tagging the pointer gives us a chance to raise/print
+ * a useful error message instead of crashing hard if a C-subclass uses
+ * the same field.
+ */
+static NPY_INLINE void *
+buffer_info_tag(void *buffer_info)
 {
-    PyObject *key = NULL, *item_list = NULL, *item = NULL;
-    _buffer_info_t *info = NULL, *old_info = NULL;
-
-    if (_buffer_info_cache == NULL) {
-        _buffer_info_cache = PyDict_New();
-        if (_buffer_info_cache == NULL) {
-            return NULL;
-        }
+    if (buffer_info == NULL) {
+        return buffer_info;
     }
-
-    /* Compute information */
-    info = _buffer_info_new(obj, flags);
-    if (info == NULL) {
-        return NULL;
+    else {
+        return (void *)((uintptr_t)buffer_info + 3);
     }
+}
 
-    /* Check if it is identical with an old one; reuse old one, if yes */
-    key = PyLong_FromVoidPtr((void*)obj);
-    if (key == NULL) {
-        goto fail;
-    }
-    item_list = PyDict_GetItem(_buffer_info_cache, key);
-
-    if (item_list != NULL) {
-        Py_ssize_t item_list_length = PyList_GET_SIZE(item_list);
-        Py_INCREF(item_list);
-        if (item_list_length > 0) {
-            item = PyList_GetItem(item_list, item_list_length - 1);
-            old_info = (_buffer_info_t*)PyLong_AsVoidPtr(item);
-            if (_buffer_info_cmp(info, old_info) != 0) {
-                old_info = NULL;  /* Can't use this one, but possibly next */
-
-                if (item_list_length > 1 && info->ndim > 1) {
-                    /*
-                     * Some arrays are C- and F-contiguous and if they have more
-                     * than one dimension, the buffer-info may differ between
-                     * the two due to RELAXED_STRIDES_CHECKING.
-                     * If we export both buffers, the first stored one may be
-                     * the one for the other contiguity, so check both.
-                     * This is generally very unlikely in all other cases, since
-                     * in all other cases the first one will match unless array
-                     * metadata was modified in-place (which is discouraged).
-                     */
-                    item = PyList_GetItem(item_list, item_list_length - 2);
-                    old_info = (_buffer_info_t*)PyLong_AsVoidPtr(item);
-                    if (_buffer_info_cmp(info, old_info) != 0) {
-                        old_info = NULL;
-                    }
-                }
-            }
 
-            if (old_info != NULL) {
-                /*
-                 * The two info->format are considered equal if one of them
-                 * has no format set (meaning the format is arbitrary and can
-                 * be modified). If the new info has a format, but we reuse
-                 * the old one, this transfers the ownership to the old one.
-                 */
-                if (old_info->format == NULL) {
-                    old_info->format = info->format;
-                    info->format = NULL;
-                }
-                _buffer_info_free(info);
-                info = old_info;
-            }
-        }
+static NPY_INLINE int
+_buffer_info_untag(
+        void *tagged_buffer_info, _buffer_info_t **buffer_info, PyObject *obj)
+{
+    if (tagged_buffer_info == NULL) {
+        *buffer_info = NULL;
+        return 0;
     }
-    else {
-        item_list = PyList_New(0);
-        if (item_list == NULL) {
-            goto fail;
-        }
-        if (PyDict_SetItem(_buffer_info_cache, key, item_list) != 0) {
-            goto fail;
-        }
+    if (NPY_UNLIKELY(((uintptr_t)tagged_buffer_info & 0x7) != 3)) {
+        PyErr_Format(PyExc_RuntimeError,
+                "Object of type %S appears to be C subclassed NumPy array, "
+                "void scalar, or allocated in a non-standard way."
+                "NumPy reserves the right to change the size of these "
+                "structures. Projects are required to take this into account "
+                "by either recompiling against a specific NumPy version or "
+                "padding the struct and enforcing a maximum NumPy version.",
+                Py_TYPE(obj));
+        return -1;
     }
+    *buffer_info = (void *)((uintptr_t)tagged_buffer_info - 3);
+    return 0;
+}
 
-    if (info != old_info) {
-        /* Needs insertion */
-        item = PyLong_FromVoidPtr((void*)info);
-        if (item == NULL) {
-            goto fail;
+
+/*
+ * NOTE: for backward compatibility (esp. with PyArg_ParseTuple("s#", ...))
+ * we do *not* define bf_releasebuffer at all.
+ *
+ * Instead, any extra data allocated with the buffer is released only in
+ * array_dealloc.
+ *
+ * Ensuring that the buffer stays in place is taken care by refcounting;
+ * ndarrays do not reallocate if there are references to them, and a buffer
+ * view holds one reference.
+ *
+ * This is stored in the array's _buffer_info slot (currently as a void *).
+ */
+static void
+_buffer_info_free_untagged(void *_buffer_info)
+{
+    _buffer_info_t *next = _buffer_info;
+    while (next != NULL) {
+        _buffer_info_t *curr = next;
+        next = curr->next;
+        if (curr->format) {
+            PyObject_Free(curr->format);
         }
-        PyList_Append(item_list, item);
-        Py_DECREF(item);
+        /* Shape is allocated as part of info */
+        PyObject_Free(curr);
     }
+}
 
-    Py_DECREF(item_list);
-    Py_DECREF(key);
-    return info;
 
-fail:
-    if (info != NULL && info != old_info) {
-        _buffer_info_free(info);
+/*
+ * Checks whether the pointer is tagged, and then frees the cache list.
+ * (The tag check is only for transition due to changed structure size in 1.20)
+ */
+NPY_NO_EXPORT int
+_buffer_info_free(void *buffer_info, PyObject *obj)
+{
+    _buffer_info_t *untagged_buffer_info;
+    if (_buffer_info_untag(buffer_info, &untagged_buffer_info, obj) < 0) {
+        return -1;
     }
-    Py_XDECREF(item_list);
-    Py_XDECREF(key);
-    return NULL;
+    _buffer_info_free_untagged(untagged_buffer_info);
+    return 0;
 }
 
-/* Clear buffer info from the global dictionary */
-static void
-_buffer_clear_info(PyObject *arr)
+
+/*
+ * Get the buffer info returning either the old one (passed in) or a new
+ * buffer info which adds holds on to (and thus replaces) the old one.
+ */
+static _buffer_info_t*
+_buffer_get_info(void **buffer_info_cache_ptr, PyObject *obj, int flags)
 {
-    PyObject *key, *item_list, *item;
-    _buffer_info_t *info;
-    int k;
+    _buffer_info_t *info = NULL;
+    _buffer_info_t *stored_info;  /* First currently stored buffer info */
+
+    if (_buffer_info_untag(*buffer_info_cache_ptr, &stored_info, obj) < 0) {
+        return NULL;
+    }
+    _buffer_info_t *old_info = stored_info;
 
-    if (_buffer_info_cache == NULL) {
-        return;
+    /* Compute information (it would be nice to skip this in simple cases) */
+    info = _buffer_info_new(obj, flags);
+    if (info == NULL) {
+        return NULL;
     }
 
-    key = PyLong_FromVoidPtr((void*)arr);
-    item_list = PyDict_GetItem(_buffer_info_cache, key);
-    if (item_list != NULL) {
-        for (k = 0; k < PyList_GET_SIZE(item_list); ++k) {
-            item = PyList_GET_ITEM(item_list, k);
-            info = (_buffer_info_t*)PyLong_AsVoidPtr(item);
-            _buffer_info_free(info);
+    if (old_info != NULL && _buffer_info_cmp(info, old_info) != 0) {
+        _buffer_info_t *next_info = old_info->next;
+        old_info = NULL;  /* Can't use this one, but possibly next */
+
+         if (info->ndim > 1 && next_info != NULL) {
+             /*
+              * Some arrays are C- and F-contiguous and if they have more
+              * than one dimension, the buffer-info may differ between
+              * the two due to RELAXED_STRIDES_CHECKING.
+              * If we export both buffers, the first stored one may be
+              * the one for the other contiguity, so check both.
+              * This is generally very unlikely in all other cases, since
+              * in all other cases the first one will match unless array
+              * metadata was modified in-place (which is discouraged).
+              */
+             if (_buffer_info_cmp(info, next_info) == 0) {
+                 old_info = next_info;
+             }
+         }
+    }
+    if (old_info != NULL) {
+        /*
+         * The two info->format are considered equal if one of them
+         * has no format set (meaning the format is arbitrary and can
+         * be modified). If the new info has a format, but we reuse
+         * the old one, this transfers the ownership to the old one.
+         */
+        if (old_info->format == NULL) {
+            old_info->format = info->format;
+            info->format = NULL;
         }
-        PyDict_DelItem(_buffer_info_cache, key);
+        _buffer_info_free_untagged(info);
+        info = old_info;
+    }
+    else {
+        /* Insert new info as first item in the linked buffer-info list. */
+        info->next = stored_info;
+        *buffer_info_cache_ptr = buffer_info_tag(info);
     }
 
-    Py_DECREF(key);
+    return info;
 }
 
+
 /*
  * Retrieving buffers for ndarray
  */
@@ -779,8 +788,9 @@ array_getbuffer(PyObject *obj, Py_buffer *view, int flags)
         goto fail;
     }
 
-    /* Fill in information */
-    info = _buffer_get_info(obj, flags);
+    /* Fill in information (and add it to _buffer_info if necessary) */
+    info = _buffer_get_info(
+            &((PyArrayObject_fields *)self)->_buffer_info, obj, flags);
     if (info == NULL) {
         goto fail;
     }
@@ -830,90 +840,48 @@ fail:
 }
 
 /*
- * Retrieving buffers for scalars
+ * Retrieving buffers for void scalar (which can contain any complex types),
+ * defined in buffer.c since it requires the complex format building logic.
  */
-int
+NPY_NO_EXPORT int
 void_getbuffer(PyObject *self, Py_buffer *view, int flags)
 {
-    _buffer_info_t *info = NULL;
-    PyArray_Descr *descr = NULL;
-    int elsize;
+    PyVoidScalarObject *scalar = (PyVoidScalarObject *)self;
 
     if (flags & PyBUF_WRITABLE) {
         PyErr_SetString(PyExc_BufferError, "scalar buffer is readonly");
-        goto fail;
-    }
-
-    /* Fill in information */
-    info = _buffer_get_info(self, flags);
-    if (info == NULL) {
-        goto fail;
-    }
-
-    view->ndim = info->ndim;
-    view->shape = info->shape;
-    view->strides = info->strides;
-
-    if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
-        view->format = info->format;
-    } else {
-        view->format = NULL;
-    }
-
-    descr = PyArray_DescrFromScalar(self);
-    view->buf = (void *)scalar_value(self, descr);
-    elsize = descr->elsize;
-    view->len = elsize;
-    if (PyArray_IsScalar(self, Datetime) || PyArray_IsScalar(self, Timedelta)) {
-        elsize = 1; /* descr->elsize,char is 8,'M', but we return 1,'B' */
+        return -1;
     }
-    view->itemsize = elsize;
-
-    Py_DECREF(descr);
 
+    view->ndim = 0;
+    view->shape = NULL;
+    view->strides = NULL;
+    view->suboffsets = NULL;
+    view->len = scalar->descr->elsize;
+    view->itemsize = scalar->descr->elsize;
     view->readonly = 1;
     view->suboffsets = NULL;
-    view->obj = self;
     Py_INCREF(self);
-    return 0;
-
-fail:
-    view->obj = NULL;
-    return -1;
-}
-
-/*
- * NOTE: for backward compatibility (esp. with PyArg_ParseTuple("s#", ...))
- * we do *not* define bf_releasebuffer at all.
- *
- * Instead, any extra data allocated with the buffer is released only in
- * array_dealloc.
- *
- * Ensuring that the buffer stays in place is taken care by refcounting;
- * ndarrays do not reallocate if there are references to them, and a buffer
- * view holds one reference.
- */
-
-NPY_NO_EXPORT void
-_dealloc_cached_buffer_info(PyObject *self)
-{
-    int reset_error_state = 0;
-    PyObject *ptype, *pvalue, *ptraceback;
-
-    /* This function may be called when processing an exception --
-     * we need to stash the error state to avoid confusing PyDict
-     */
+    view->obj = self;
+    view->buf = scalar->obval;
 
-    if (PyErr_Occurred()) {
-        reset_error_state = 1;
-        PyErr_Fetch(&ptype, &pvalue, &ptraceback);
+    if (((flags & PyBUF_FORMAT) != PyBUF_FORMAT)) {
+        /* It is unnecessary to find the correct format */
+        view->format = NULL;
+        return 0;
     }
 
-    _buffer_clear_info(self);
-
-    if (reset_error_state) {
-        PyErr_Restore(ptype, pvalue, ptraceback);
+    /*
+     * If a format is being exported, we need to use _buffer_get_info
+     * to find the correct format.  This format must also be stored, since
+     * at least in theory it can change (in practice it should never change).
+     */
+    _buffer_info_t *info = _buffer_get_info(&scalar->_buffer_info, self, flags);
+    if (info == NULL) {
+        return -1;
     }
+    view->format = info->format;
+    return 0;
 }
 
 
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 061db2250..8ab592015 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1420,7 +1420,7 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args)
 {
     PyObject *obj;
     PyObject *str;
-    #if (PY_VERSION_HEX >= 0x030700A2)
+    #if PY_VERSION_HEX >= 0x030700A2 && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300)
     const char *docstr;
     #else
     char *docstr;
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index f700bdc99..f9dd35a73 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -18,10 +18,13 @@
 #include "dtypemeta.h"
 #include "scalartypes.h"
 #include "mapping.h"
+#include "legacy_dtype_implementation.h"
 
 #include "convert_datatype.h"
 #include "_datetime.h"
 #include "datetime_strings.h"
+#include "array_method.h"
+#include "usertypes.h"
 
 
 /*
@@ -35,6 +38,183 @@
  */
 NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20};
 
+
+static PyObject *
+PyArray_GetGenericToVoidCastingImpl(void);
+
+static PyObject *
+PyArray_GetVoidToGenericCastingImpl(void);
+
+static PyObject *
+PyArray_GetGenericToObjectCastingImpl(void);
+
+static PyObject *
+PyArray_GetObjectToGenericCastingImpl(void);
+
+
+/**
+ * Fetch the casting implementation from one DType to another.
+ *
+ * @params from
+ * @params to
+ *
+ * @returns A castingimpl (PyArrayDTypeMethod *), None or NULL with an
+ *          error set.
+ */
+static PyObject *
+PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
+{
+    PyObject *res = PyDict_GetItem(from->castingimpls, (PyObject *)to);
+    if (res != NULL || PyErr_Occurred()) {
+        Py_XINCREF(res);
+        return res;
+    }
+    /*
+     * The following code looks up CastingImpl based on the fact that anything
+     * can be cast to and from objects or structured (void) dtypes.
+     *
+     * The last part adds casts dynamically based on legacy definition
+     */
+    if (from->type_num == NPY_OBJECT) {
+        res = PyArray_GetObjectToGenericCastingImpl();
+    }
+    else if (to->type_num == NPY_OBJECT) {
+        res = PyArray_GetGenericToObjectCastingImpl();
+    }
+    else if (from->type_num == NPY_VOID) {
+        res = PyArray_GetVoidToGenericCastingImpl();
+    }
+    else if (to->type_num == NPY_VOID) {
+        res = PyArray_GetGenericToVoidCastingImpl();
+    }
+    else if (from->type_num < NPY_NTYPES && to->type_num < NPY_NTYPES) {
+        /* All builtin dtypes have their casts explicitly defined. */
+        PyErr_Format(PyExc_RuntimeError,
+                "builtin cast from %S to %s not found, this should not "
+                "be possible.", from, to);
+        return NULL;
+    }
+    else {
+        if (from->parametric || to->parametric) {
+            Py_RETURN_NONE;
+        }
+        /* Reject non-legacy dtypes (they need to use the new API) */
+        if (!from->legacy || !to->legacy) {
+            Py_RETURN_NONE;
+        }
+        if (from != to) {
+            /* A cast function must have been registered */
+            PyArray_VectorUnaryFunc *castfunc = PyArray_GetCastFunc(
+                    from->singleton, to->type_num);
+            if (castfunc == NULL) {
+                PyErr_Clear();
+                /* Remember that this cast is not possible */
+                if (PyDict_SetItem(from->castingimpls, (PyObject *) to, Py_None) < 0) {
+                    return NULL;
+                }
+                Py_RETURN_NONE;
+            }
+        }
+
+        /* PyArray_AddLegacyWrapping_CastingImpl find the correct casting level: */
+        /*
+         * TODO: Possibly move this to the cast registration time. But if we do
+         *       that, we have to also update the cast when the casting safety
+         *       is registered.
+         */
+        if (PyArray_AddLegacyWrapping_CastingImpl(from, to, -1) < 0) {
+            return NULL;
+        }
+        return PyArray_GetCastingImpl(from, to);
+    }
+
+    if (res == NULL) {
+        return NULL;
+    }
+    if (PyDict_SetItem(from->castingimpls, (PyObject *)to, res) < 0) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    return res;
+}
+
+
+/**
+ * Fetch the (bound) casting implementation from one DType to another.
+ *
+ * @params from
+ * @params to
+ *
+ * @returns A bound casting implementation or None (or NULL for error).
+ */
+static PyObject *
+PyArray_GetBoundCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
+{
+    PyObject *method = PyArray_GetCastingImpl(from, to);
+    if (method == NULL || method == Py_None) {
+        return method;
+    }
+
+    /* TODO: Create better way to wrap method into bound method */
+    PyBoundArrayMethodObject *res;
+    res = PyObject_New(PyBoundArrayMethodObject, &PyBoundArrayMethod_Type);
+    if (res == NULL) {
+        return NULL;
+    }
+    res->method = (PyArrayMethodObject *)method;
+    res->dtypes = PyMem_Malloc(2 * sizeof(PyArray_DTypeMeta *));
+    if (res->dtypes == NULL) {
+        Py_DECREF(res);
+        return NULL;
+    }
+    Py_INCREF(from);
+    res->dtypes[0] = from;
+    Py_INCREF(to);
+    res->dtypes[1] = to;
+
+    return (PyObject *)res;
+}
+
+
+NPY_NO_EXPORT PyObject *
+_get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args)
+{
+    PyArray_DTypeMeta *from, *to;
+    if (!PyArg_ParseTuple(args, "O!O!:_get_castingimpl",
+            &PyArrayDTypeMeta_Type, &from, &PyArrayDTypeMeta_Type, &to)) {
+        return NULL;
+    }
+    return PyArray_GetBoundCastingImpl(from, to);
+}
+
+
+/**
+ * Find the minimal cast safety level given two cast-levels as input.
+ * Supports the NPY_CAST_IS_VIEW check, and should be preferred to allow
+ * extending cast-levels if necessary.
+ * It is not valid for one of the arguments to be -1 to indicate an error.
+ *
+ * @param casting1
+ * @param casting2
+ * @return The minimal casting error (can be -1).
+ */
+NPY_NO_EXPORT NPY_CASTING
+PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2)
+{
+    if (casting1 < 0 || casting2 < 0) {
+        return -1;
+    }
+    NPY_CASTING view = casting1 & casting2 & _NPY_CAST_IS_VIEW;
+    casting1 = casting1 & ~_NPY_CAST_IS_VIEW;
+    casting2 = casting2 & ~_NPY_CAST_IS_VIEW;
+    /* larger casting values are less safe */
+    if (casting1 > casting2) {
+        return casting1 | view;
+    }
+    return casting2 | view;
+}
+
+
 /*NUMPY_API
  * For backward compatibility
  *
@@ -132,170 +312,6 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
     return NULL;
 }
 
-/*
- * Legacy function to find the correct dtype when casting from any built-in
- * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic
- * units.
- *
- * This function returns a dtype based on flex_dtype and the values in
- * data_dtype. It also calls Py_DECREF on the flex_dtype. If the
- * flex_dtype is not flexible, it returns it as-is.
- *
- * Usually, if data_obj is not an array, dtype should be the result
- * given by the PyArray_GetArrayParamsFromObject function.
- *
- * If *flex_dtype is NULL, returns immediately, without setting an
- * exception, leaving any previous error handling intact.
- */
-NPY_NO_EXPORT PyArray_Descr *
-PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype)
-{
-    PyArray_DatetimeMetaData *meta;
-    PyArray_Descr *retval = NULL;
-    int flex_type_num;
-
-    if (flex_dtype == NULL) {
-        return retval;
-    }
-
-    flex_type_num = flex_dtype->type_num;
-
-    /* Flexible types with expandable size */
-    if (PyDataType_ISUNSIZED(flex_dtype)) {
-        /* First replace the flex_dtype */
-        retval = PyArray_DescrNew(flex_dtype);
-        Py_DECREF(flex_dtype);
-        if (retval == NULL) {
-            return retval;
-        }
-
-        if (data_dtype->type_num == flex_type_num ||
-                                    flex_type_num == NPY_VOID) {
-            (retval)->elsize = data_dtype->elsize;
-        }
-        else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) {
-            npy_intp size = 8;
-
-            /*
-             * Get a string-size estimate of the input. These
-             * are generallly the size needed, rounded up to
-             * a multiple of eight.
-             */
-            switch (data_dtype->type_num) {
-                case NPY_BOOL:
-                case NPY_UBYTE:
-                case NPY_BYTE:
-                case NPY_USHORT:
-                case NPY_SHORT:
-                case NPY_UINT:
-                case NPY_INT:
-                case NPY_ULONG:
-                case NPY_LONG:
-                case NPY_ULONGLONG:
-                case NPY_LONGLONG:
-                    if (data_dtype->kind == 'b') {
-                        /* 5 chars needed for cast to 'True' or 'False' */
-                        size = 5;
-                    }
-                    else if (data_dtype->elsize > 8 ||
-                             data_dtype->elsize < 0) {
-                        /*
-                         * Element size should never be greater than 8 or
-                         * less than 0 for integer type, but just in case...
-                         */
-                        break;
-                    }
-                    else if (data_dtype->kind == 'u') {
-                        size = REQUIRED_STR_LEN[data_dtype->elsize];
-                    }
-                    else if (data_dtype->kind == 'i') {
-                        /* Add character for sign symbol */
-                        size = REQUIRED_STR_LEN[data_dtype->elsize] + 1;
-                    }
-                    break;
-                case NPY_HALF:
-                case NPY_FLOAT:
-                case NPY_DOUBLE:
-                    size = 32;
-                    break;
-                case NPY_LONGDOUBLE:
-                    size = 48;
-                    break;
-                case NPY_CFLOAT:
-                case NPY_CDOUBLE:
-                    size = 2 * 32;
-                    break;
-                case NPY_CLONGDOUBLE:
-                    size = 2 * 48;
-                    break;
-                case NPY_OBJECT:
-                    size = 64;
-                    break;
-                case NPY_STRING:
-                case NPY_VOID:
-                    size = data_dtype->elsize;
-                    break;
-                case NPY_UNICODE:
-                    size = data_dtype->elsize / 4;
-                    break;
-                case NPY_DATETIME:
-                    meta = get_datetime_metadata_from_dtype(data_dtype);
-                    if (meta == NULL) {
-                        Py_DECREF(retval);
-                        return NULL;
-                    }
-                    size = get_datetime_iso_8601_strlen(0, meta->base);
-                    break;
-                case NPY_TIMEDELTA:
-                    size = 21;
-                    break;
-            }
-
-            if (flex_type_num == NPY_STRING) {
-                retval->elsize = size;
-            }
-            else if (flex_type_num == NPY_UNICODE) {
-                retval->elsize = size * 4;
-            }
-        }
-        else {
-            /*
-             * We should never get here, but just in case someone adds
-             * a new flex dtype...
-             */
-            PyErr_SetString(PyExc_TypeError,
-                    "don't know how to adapt flex dtype");
-            Py_DECREF(retval);
-            return NULL;
-        }
-    }
-    /* Flexible type with generic time unit that adapts */
-    else if (flex_type_num == NPY_DATETIME ||
-                flex_type_num == NPY_TIMEDELTA) {
-        meta = get_datetime_metadata_from_dtype(flex_dtype);
-        retval = flex_dtype;
-        if (meta == NULL) {
-            return NULL;
-        }
-
-        if (meta->base == NPY_FR_GENERIC) {
-            if (data_dtype->type_num == NPY_DATETIME ||
-                    data_dtype->type_num == NPY_TIMEDELTA) {
-                meta = get_datetime_metadata_from_dtype(data_dtype);
-                if (meta == NULL) {
-                    return NULL;
-                }
-
-                retval = create_datetime_dtype(flex_type_num, meta);
-                Py_DECREF(flex_dtype);
-            }
-        }
-    }
-    else {
-        retval = flex_dtype;
-    }
-    return retval;
-}
 
 /*
  * Must be broadcastable.
@@ -325,42 +341,116 @@ PyArray_CastAnyTo(PyArrayObject *out, PyArrayObject *mp)
     return PyArray_CopyAnyInto(out, mp);
 }
 
+
+/**
+ * Given two dtype instances, find the correct casting safety.
+ *
+ * Note that in many cases, it may be preferable to fetch the casting
+ * implementations fully to have them available for doing the actual cast
+ * later.
+ *
+ * @param from
+ * @param to The descriptor to cast to (may be NULL)
+ * @param to_dtype If `to` is NULL, must pass the to_dtype (otherwise this
+ *        is ignored).
+ * @return NPY_CASTING or -1 on error or if the cast is not possible.
+ */
+NPY_NO_EXPORT NPY_CASTING
+PyArray_GetCastSafety(
+        PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype)
+{
+    NPY_CASTING casting;
+    if (to != NULL) {
+        to_dtype = NPY_DTYPE(to);
+    }
+    PyObject *meth = PyArray_GetCastingImpl(NPY_DTYPE(from), to_dtype);
+    if (meth == NULL) {
+        return -1;
+    }
+    if (meth == Py_None) {
+        Py_DECREF(Py_None);
+        return -1;
+    }
+
+    PyArrayMethodObject *castingimpl = (PyArrayMethodObject *)meth;
+
+    PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(from), to_dtype};
+    PyArray_Descr *descrs[2] = {from, to};
+    PyArray_Descr *out_descrs[2];
+
+    casting = castingimpl->resolve_descriptors(
+            castingimpl, dtypes, descrs, out_descrs);
+    Py_DECREF(meth);
+    if (casting < 0) {
+        return -1;
+    }
+    /* The returned descriptors may not match, requiring a second check */
+    if (out_descrs[0] != descrs[0]) {
+        NPY_CASTING from_casting = PyArray_GetCastSafety(
+                descrs[0], out_descrs[0], NULL);
+        casting = PyArray_MinCastSafety(casting, from_casting);
+        if (casting < 0) {
+            goto finish;
+        }
+    }
+    if (descrs[1] != NULL && out_descrs[1] != descrs[1]) {
+        NPY_CASTING from_casting = PyArray_GetCastSafety(
+                descrs[1], out_descrs[1], NULL);
+        casting = PyArray_MinCastSafety(casting, from_casting);
+        if (casting < 0) {
+            goto finish;
+        }
+    }
+
+  finish:
+    Py_DECREF(out_descrs[0]);
+    Py_DECREF(out_descrs[1]);
+    /* NPY_NO_CASTING has to be used for (NPY_EQUIV_CASTING|_NPY_CAST_IS_VIEW) */
+    assert(casting != (NPY_EQUIV_CASTING|_NPY_CAST_IS_VIEW));
+    return casting;
+}
+
+
 /*NUMPY_API
  *Check the type coercion rules.
  */
 NPY_NO_EXPORT int
 PyArray_CanCastSafely(int fromtype, int totype)
 {
-    PyArray_Descr *from;
-
-    /* Fast table lookup for small type numbers */
-    if ((unsigned int)fromtype < NPY_NTYPES &&
-                                (unsigned int)totype < NPY_NTYPES) {
-        return _npy_can_cast_safely_table[fromtype][totype];
+#if NPY_USE_NEW_CASTINGIMPL
+    PyArray_DTypeMeta *from = PyArray_DTypeFromTypeNum(fromtype);
+    if (from == NULL) {
+        PyErr_WriteUnraisable(NULL);
+        return 0;
     }
-
-    /* Identity */
-    if (fromtype == totype) {
-        return 1;
+    PyArray_DTypeMeta *to = PyArray_DTypeFromTypeNum(totype);
+    if (to == NULL) {
+        PyErr_WriteUnraisable(NULL);
+        return 0;
     }
+    PyObject *castingimpl = PyArray_GetCastingImpl(from, to);
+    Py_DECREF(from);
+    Py_DECREF(to);
 
-    from = PyArray_DescrFromType(fromtype);
-    /*
-     * cancastto is a NPY_NOTYPE terminated C-int-array of types that
-     * the data-type can be cast to safely.
-     */
-    if (from->f->cancastto) {
-        int *curtype = from->f->cancastto;
-
-        while (*curtype != NPY_NOTYPE) {
-            if (*curtype++ == totype) {
-                return 1;
-            }
-        }
+    if (castingimpl == NULL) {
+        PyErr_WriteUnraisable(NULL);
+        return 0;
     }
-    return 0;
+    else if (castingimpl == Py_None) {
+        Py_DECREF(Py_None);
+        return 0;
+    }
+    NPY_CASTING safety = ((PyArrayMethodObject *)castingimpl)->casting;
+    int res = PyArray_MinCastSafety(safety, NPY_SAFE_CASTING) == NPY_SAFE_CASTING;
+    Py_DECREF(castingimpl);
+    return res;
+#else
+    return PyArray_LegacyCanCastSafely(fromtype, totype);
+#endif
 }
 
+
+
 /*NUMPY_API
  * leaves reference count alone --- cannot be NULL
  *
@@ -370,117 +460,16 @@ PyArray_CanCastSafely(int fromtype, int totype)
 NPY_NO_EXPORT npy_bool
 PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to)
 {
-    int from_type_num = from->type_num;
-    int to_type_num = to->type_num;
-    npy_bool ret;
-
-    ret = (npy_bool) PyArray_CanCastSafely(from_type_num, to_type_num);
-    if (ret) {
-        /* Check String and Unicode more closely */
-        if (from_type_num == NPY_STRING) {
-            if (to_type_num == NPY_STRING) {
-                ret = (from->elsize <= to->elsize);
-            }
-            else if (to_type_num == NPY_UNICODE) {
-                ret = (from->elsize << 2 <= to->elsize);
-            }
-        }
-        else if (from_type_num == NPY_UNICODE) {
-            if (to_type_num == NPY_UNICODE) {
-                ret = (from->elsize <= to->elsize);
-            }
-        }
-        /*
-         * For datetime/timedelta, only treat casts moving towards
-         * more precision as safe.
-         */
-        else if (from_type_num == NPY_DATETIME && to_type_num == NPY_DATETIME) {
-            PyArray_DatetimeMetaData *meta1, *meta2;
-            meta1 = get_datetime_metadata_from_dtype(from);
-            if (meta1 == NULL) {
-                PyErr_Clear();
-                return 0;
-            }
-            meta2 = get_datetime_metadata_from_dtype(to);
-            if (meta2 == NULL) {
-                PyErr_Clear();
-                return 0;
-            }
-
-            return can_cast_datetime64_metadata(meta1, meta2,
-                                                NPY_SAFE_CASTING);
-        }
-        else if (from_type_num == NPY_TIMEDELTA &&
-                                    to_type_num == NPY_TIMEDELTA) {
-            PyArray_DatetimeMetaData *meta1, *meta2;
-            meta1 = get_datetime_metadata_from_dtype(from);
-            if (meta1 == NULL) {
-                PyErr_Clear();
-                return 0;
-            }
-            meta2 = get_datetime_metadata_from_dtype(to);
-            if (meta2 == NULL) {
-                PyErr_Clear();
-                return 0;
-            }
-
-            return can_cast_timedelta64_metadata(meta1, meta2,
-                                                 NPY_SAFE_CASTING);
-        }
-        /*
-         * If to_type_num is STRING or unicode
-         * see if the length is long enough to hold the
-         * stringified value of the object.
-         */
-        else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) {
-            /*
-             * Boolean value cast to string type is 5 characters max
-             * for string 'False'.
-             */
-            int char_size = 1;
-            if (to_type_num == NPY_UNICODE) {
-                char_size = 4;
-            }
-
-            ret = 0;
-            if (PyDataType_ISUNSIZED(to)) {
-                ret = 1;
-            }
-            /*
-             * Need at least 5 characters to convert from boolean
-             * to 'True' or 'False'.
-             */
-            else if (from->kind == 'b' && to->elsize >= 5 * char_size) {
-                ret = 1;
-            }
-            else if (from->kind == 'u') {
-                /* Guard against unexpected integer size */
-                if (from->elsize > 8 || from->elsize < 0) {
-                    ret = 0;
-                }
-                else if (to->elsize >=
-                        REQUIRED_STR_LEN[from->elsize] * char_size) {
-                    ret = 1;
-                }
-            }
-            else if (from->kind == 'i') {
-                /* Guard against unexpected integer size */
-                if (from->elsize > 8 || from->elsize < 0) {
-                    ret = 0;
-                }
-                /* Extra character needed for sign */
-                else if (to->elsize >=
-                        (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) {
-                    ret = 1;
-                }
-            }
-        }
-    }
-    return ret;
+#if NPY_USE_NEW_CASTINGIMPL
+    return PyArray_CanCastTypeTo(from, to, NPY_SAFE_CASTING);
+#else
+    return PyArray_LegacyCanCastTo(from, to);
+#endif
 }
 
+
 /* Provides an ordering for the dtype 'kind' character codes */
-static int
+NPY_NO_EXPORT int
 dtype_kind_to_ordering(char kind)
 {
     switch (kind) {
@@ -541,51 +530,6 @@ type_num_unsigned_to_signed(int type_num)
     }
 }
 
-/*
- * Compare two field dictionaries for castability.
- *
- * Return 1 if 'field1' can be cast to 'field2' according to the rule
- * 'casting', 0 if not.
- *
- * Castabiliy of field dictionaries is defined recursively: 'field1' and
- * 'field2' must have the same field names (possibly in different
- * orders), and the corresponding field types must be castable according
- * to the given casting rule.
- */
-static int
-can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting)
-{
-    Py_ssize_t ppos;
-    PyObject *key;
-    PyObject *tuple1, *tuple2;
-
-    if (field1 == field2) {
-        return 1;
-    }
-    if (field1 == NULL || field2 == NULL) {
-        return 0;
-    }
-    if (PyDict_Size(field1) != PyDict_Size(field2)) {
-        return 0;
-    }
-
-    /* Iterate over all the fields and compare for castability */
-    ppos = 0;
-    while (PyDict_Next(field1, &ppos, &key, &tuple1)) {
-        if ((tuple2 = PyDict_GetItem(field2, key)) == NULL) {
-            return 0;
-        }
-        /* Compare the dtype of the field for castability */
-        if (!PyArray_CanCastTypeTo(
-                        (PyArray_Descr *)PyTuple_GET_ITEM(tuple1, 0),
-                        (PyArray_Descr *)PyTuple_GET_ITEM(tuple2, 0),
-                        casting)) {
-            return 0;
-        }
-    }
-
-    return 1;
-}
 
 /*NUMPY_API
  * Returns true if data of type 'from' may be cast to data of type
@@ -593,224 +537,41 @@ can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting)
  */
 NPY_NO_EXPORT npy_bool
 PyArray_CanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to,
-                                                    NPY_CASTING casting)
+        NPY_CASTING casting)
 {
+#if NPY_USE_NEW_CASTINGIMPL
     /*
-     * Fast paths for equality and for basic types.
+     * NOTE: This code supports U and S, this is identical to the code
+     *       in `ctors.c` which does not allow these dtypes to be attached
+     *       to an array. Unlike the code for `np.array(..., dtype=)`
+     *       which uses `PyArray_ExtractDTypeAndDescriptor` it rejects "m8"
+     *       as a flexible dtype instance representing a DType.
      */
-    if (from == to ||
-        ((NPY_LIKELY(PyDataType_ISNUMBER(from)) ||
-          PyDataType_ISOBJECT(from)) &&
-         NPY_LIKELY(from->type_num == to->type_num) &&
-         NPY_LIKELY(from->byteorder == to->byteorder))) {
-        return 1;
-    }
-    /*
-     * Cases with subarrays and fields need special treatment.
-     */
-    if (PyDataType_HASFIELDS(from)) {
-        /*
-         * If from is a structured data type, then it can be cast to a simple
-         * non-object one only for unsafe casting *and* if it has a single
-         * field; recurse just in case the single field is itself structured.
-         */
-        if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) {
-            if (casting == NPY_UNSAFE_CASTING &&
-                    PyDict_Size(from->fields) == 1) {
-                Py_ssize_t ppos = 0;
-                PyObject *tuple;
-                PyArray_Descr *field;
-                PyDict_Next(from->fields, &ppos, NULL, &tuple);
-                field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0);
-                /*
-                 * For a subarray, we need to get the underlying type;
-                 * since we already are casting unsafely, we can ignore
-                 * the shape.
-                 */
-                if (PyDataType_HASSUBARRAY(field)) {
-                    field = field->subarray->base;
-                }
-                return PyArray_CanCastTypeTo(field, to, casting);
-            }
-            else {
-                return 0;
-            }
-        }
-        /*
-         * Casting from one structured data type to another depends on the fields;
-         * we pass that case on to the EquivTypenums case below.
-         *
-         * TODO: move that part up here? Need to check whether equivalent type
-         * numbers is an addition constraint that is needed.
-         *
-         * TODO/FIXME: For now, always allow structured to structured for unsafe
-         * casting; this is not correct, but needed since the treatment in can_cast
-         * below got out of sync with astype; see gh-13667.
-         */
-        if (casting == NPY_UNSAFE_CASTING) {
-            return 1;
-        }
-    }
-    else if (PyDataType_HASFIELDS(to)) {
-        /*
-         * If "from" is a simple data type and "to" has fields, then only
-         * unsafe casting works (and that works always, even to multiple fields).
-         */
-        return casting == NPY_UNSAFE_CASTING;
-    }
     /*
-     * Everything else we consider castable for unsafe for now.
-     * FIXME: ensure what we do here is consistent with "astype",
-     * i.e., deal more correctly with subarrays and user-defined dtype.
+     * TODO: We should grow support for `np.can_cast("d", "S")` being
+     *       different from `np.can_cast("d", "S0")` here, at least for
+     *       the python side API.
      */
-    else if (casting == NPY_UNSAFE_CASTING) {
-        return 1;
+    NPY_CASTING safety;
+    if (PyDataType_ISUNSIZED(to) && to->subarray == NULL) {
+        safety = PyArray_GetCastSafety(from, NULL, NPY_DTYPE(to));
     }
-    /*
-     * Equivalent simple types can be cast with any value of 'casting', but
-     * we need to be careful about structured to structured.
-     */
-    if (PyArray_EquivTypenums(from->type_num, to->type_num)) {
-        /* For complicated case, use EquivTypes (for now) */
-        if (PyTypeNum_ISUSERDEF(from->type_num) ||
-                        from->subarray != NULL) {
-            int ret;
-
-            /* Only NPY_NO_CASTING prevents byte order conversion */
-            if ((casting != NPY_NO_CASTING) &&
-                                (!PyArray_ISNBO(from->byteorder) ||
-                                 !PyArray_ISNBO(to->byteorder))) {
-                PyArray_Descr *nbo_from, *nbo_to;
-
-                nbo_from = PyArray_DescrNewByteorder(from, NPY_NATIVE);
-                nbo_to = PyArray_DescrNewByteorder(to, NPY_NATIVE);
-                if (nbo_from == NULL || nbo_to == NULL) {
-                    Py_XDECREF(nbo_from);
-                    Py_XDECREF(nbo_to);
-                    PyErr_Clear();
-                    return 0;
-                }
-                ret = PyArray_EquivTypes(nbo_from, nbo_to);
-                Py_DECREF(nbo_from);
-                Py_DECREF(nbo_to);
-            }
-            else {
-                ret = PyArray_EquivTypes(from, to);
-            }
-            return ret;
-        }
-
-        if (PyDataType_HASFIELDS(from)) {
-            switch (casting) {
-                case NPY_EQUIV_CASTING:
-                case NPY_SAFE_CASTING:
-                case NPY_SAME_KIND_CASTING:
-                    /*
-                     * `from' and `to' must have the same fields, and
-                     * corresponding fields must be (recursively) castable.
-                     */
-                    return can_cast_fields(from->fields, to->fields, casting);
-
-                case NPY_NO_CASTING:
-                default:
-                    return PyArray_EquivTypes(from, to);
-            }
-        }
-
-        switch (from->type_num) {
-            case NPY_DATETIME: {
-                PyArray_DatetimeMetaData *meta1, *meta2;
-                meta1 = get_datetime_metadata_from_dtype(from);
-                if (meta1 == NULL) {
-                    PyErr_Clear();
-                    return 0;
-                }
-                meta2 = get_datetime_metadata_from_dtype(to);
-                if (meta2 == NULL) {
-                    PyErr_Clear();
-                    return 0;
-                }
-
-                if (casting == NPY_NO_CASTING) {
-                    return PyArray_ISNBO(from->byteorder) ==
-                                        PyArray_ISNBO(to->byteorder) &&
-                            can_cast_datetime64_metadata(meta1, meta2, casting);
-                }
-                else {
-                    return can_cast_datetime64_metadata(meta1, meta2, casting);
-                }
-            }
-            case NPY_TIMEDELTA: {
-                PyArray_DatetimeMetaData *meta1, *meta2;
-                meta1 = get_datetime_metadata_from_dtype(from);
-                if (meta1 == NULL) {
-                    PyErr_Clear();
-                    return 0;
-                }
-                meta2 = get_datetime_metadata_from_dtype(to);
-                if (meta2 == NULL) {
-                    PyErr_Clear();
-                    return 0;
-                }
-
-                if (casting == NPY_NO_CASTING) {
-                    return PyArray_ISNBO(from->byteorder) ==
-                                        PyArray_ISNBO(to->byteorder) &&
-                        can_cast_timedelta64_metadata(meta1, meta2, casting);
-                }
-                else {
-                    return can_cast_timedelta64_metadata(meta1, meta2, casting);
-                }
-            }
-            default:
-                switch (casting) {
-                    case NPY_NO_CASTING:
-                        return PyArray_EquivTypes(from, to);
-                    case NPY_EQUIV_CASTING:
-                        return (from->elsize == to->elsize);
-                    case NPY_SAFE_CASTING:
-                        return (from->elsize <= to->elsize);
-                    default:
-                        return 1;
-                }
-                break;
-        }
+    else {
+        safety = PyArray_GetCastSafety(from, to, NPY_DTYPE(to));
     }
-    /* If safe or same-kind casts are allowed */
-    else if (casting == NPY_SAFE_CASTING || casting == NPY_SAME_KIND_CASTING) {
-        if (PyArray_CanCastTo(from, to)) {
-            return 1;
-        }
-        else if(casting == NPY_SAME_KIND_CASTING) {
-            /*
-             * Also allow casting from lower to higher kinds, according
-             * to the ordering provided by dtype_kind_to_ordering.
-             * Some kinds, like datetime, don't fit in the hierarchy,
-             * and are special cased as -1.
-             */
-            int from_order, to_order;
-
-            from_order = dtype_kind_to_ordering(from->kind);
-            to_order = dtype_kind_to_ordering(to->kind);
-
-            if (to->kind == 'm') {
-                /* both types being timedelta is already handled before. */
-                int integer_order = dtype_kind_to_ordering('i');
-                return (from_order != -1) && (from_order <= integer_order);
-            }
 
-            return (from_order != -1) && (from_order <= to_order);
-        }
-        else {
-            return 0;
-        }
-    }
-    /* NPY_NO_CASTING or NPY_EQUIV_CASTING was specified */
-    else {
+    if (safety < 0) {
+        PyErr_Clear();
         return 0;
     }
+    /* If casting is the smaller (or equal) safety we match */
+    return PyArray_MinCastSafety(safety, casting) == casting;
+#else
+    return PyArray_LegacyCanCastTypeTo(from, to, casting);
+#endif
 }
 
+
 /* CanCastArrayTo needs this function */
 static int min_scalar_type_num(char *valueptr, int type_num,
                                             int *is_small_unsigned);
@@ -1035,7 +796,7 @@ ensure_dtype_nbo(PyArray_Descr *type)
 /**
  * This function should possibly become public API eventually.  At this
  * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`.
- * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement
+ * We will use `CastingImpl[from, to].resolve_descriptors(...)` to implement
  * this logic.
  * Before that, the API needs to be reviewed though.
  *
@@ -1067,6 +828,35 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType)
         return descr;
     }
 
+#if NPY_USE_NEW_CASTINGIMPL
+    PyObject *tmp = PyArray_GetCastingImpl(NPY_DTYPE(descr), given_DType);
+    if (tmp == NULL || tmp == Py_None) {
+        Py_XDECREF(tmp);
+        goto error;
+    }
+    PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(descr), given_DType};
+    PyArray_Descr *given_descrs[2] = {descr, NULL};
+    PyArray_Descr *loop_descrs[2];
+
+    PyArrayMethodObject *meth = (PyArrayMethodObject *)tmp;
+    NPY_CASTING casting = meth->resolve_descriptors(
+            meth, dtypes, given_descrs, loop_descrs);
+    Py_DECREF(tmp);
+    if (casting < 0) {
+        goto error;
+    }
+    Py_DECREF(loop_descrs[0]);
+    return loop_descrs[1];
+
+  error:;  /* (; due to compiler limitations) */
+    PyObject *err_type = NULL, *err_value = NULL, *err_traceback = NULL;
+    PyErr_Fetch(&err_type, &err_value, &err_traceback);
+    PyErr_Format(PyExc_ValueError,
+            "cannot cast dtype %S to %S.", descr, given_DType);
+    npy_PyErr_ChainExceptions(err_type, err_value, err_traceback);
+    return NULL;
+
+#else  /* NPY_USE_NEW_CASTS */
     if (!given_DType->legacy) {
         PyErr_SetString(PyExc_NotImplementedError,
                 "Must use casting to find the correct DType for a parametric "
@@ -1077,6 +867,7 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType)
 
     PyArray_Descr *flex_dtype = PyArray_DescrNew(given_DType->singleton);
     return PyArray_AdaptFlexibleDType(descr, flex_dtype);
+#endif  /* NPY_USE_NEW_CASTS */
 }
 
 
@@ -2007,3 +1798,1108 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn)
     PyDataMem_FREE(mps);
     return NULL;
 }
+
+
+/**
+ * Private function to add a casting implementation by unwrapping a bound
+ * array method.
+ *
+ * @param meth
+ * @return 0 on success -1 on failure.
+ */
+NPY_NO_EXPORT int
+PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth)
+{
+    if (meth->method->nin != 1 || meth->method->nout != 1) {
+        PyErr_SetString(PyExc_TypeError,
+                "A cast must have one input and one output.");
+        return -1;
+    }
+    if (meth->dtypes[0] == meth->dtypes[1]) {
+        if (!(meth->method->flags & NPY_METH_SUPPORTS_UNALIGNED)) {
+            PyErr_Format(PyExc_TypeError,
+                    "A cast where input and output DType (class) are identical "
+                    "must currently support unaligned data. (method: %s)",
+                    meth->method->name);
+            return -1;
+        }
+        if ((meth->method->casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING) {
+            PyErr_Format(PyExc_TypeError,
+                    "A cast where input and output DType (class) are identical "
+                    "must signal `no-casting`. (method: %s)",
+                    meth->method->name);
+            return -1;
+        }
+    }
+    if (PyDict_Contains(meth->dtypes[0]->castingimpls,
+            (PyObject *)meth->dtypes[1])) {
+        PyErr_Format(PyExc_RuntimeError,
+                "A cast was already added for %S -> %S. (method: %s)",
+                meth->dtypes[0], meth->dtypes[1], meth->method->name);
+        return -1;
+    }
+    if (PyDict_SetItem(meth->dtypes[0]->castingimpls,
+            (PyObject *)meth->dtypes[1], (PyObject *)meth->method) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+/**
+ * Add a new casting implementation using a PyArrayMethod_Spec.
+ *
+ * @param spec
+ * @param private If private, allow slots not publically exposed.
+ * @return 0 on success -1 on failure
+ */
+NPY_NO_EXPORT int
+PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private)
+{
+    /* Create a bound method, unbind and store it */
+    PyBoundArrayMethodObject *meth = PyArrayMethod_FromSpec_int(spec, private);
+    if (meth == NULL) {
+        return -1;
+    }
+    int res = PyArray_AddCastingImplmentation(meth);
+    Py_DECREF(meth);
+    if (res < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+
+NPY_NO_EXPORT NPY_CASTING
+legacy_same_dtype_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = ensure_dtype_nbo(loop_descrs[0]);
+        if (loop_descrs[1] == NULL) {
+            Py_DECREF(loop_descrs[0]);
+            return -1;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    /* this function only makes sense for non-flexible legacy dtypes: */
+    assert(loop_descrs[0]->elsize == loop_descrs[1]->elsize);
+
+    /*
+     * Legacy dtypes (except datetime) only have byte-order and elsize as
+     * storage parameters.
+     */
+    if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) ==
+                PyDataType_ISNOTSWAPPED(loop_descrs[1])) {
+        return NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+    }
+    return NPY_EQUIV_CASTING;
+}
+
+
+/*
+ * Simple dtype resolver for casting between two different (non-parametric)
+ * (legacy) dtypes.
+ */
+NPY_NO_EXPORT NPY_CASTING
+simple_cast_resolve_descriptors(
+        PyArrayMethodObject *self,
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    assert(dtypes[0]->legacy && dtypes[1]->legacy);
+
+    loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]);
+    if (loop_descrs[0] == NULL) {
+        return -1;
+    }
+    if (given_descrs[1] != NULL) {
+        loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]);
+        if (loop_descrs[1] == NULL) {
+            Py_DECREF(loop_descrs[0]);
+            return -1;
+        }
+    }
+    else {
+        loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]);
+    }
+
+    if (self->casting != NPY_NO_CASTING) {
+        return self->casting;
+    }
+    if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) ==
+            PyDataType_ISNOTSWAPPED(loop_descrs[1])) {
+        return NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+    }
+    return NPY_EQUIV_CASTING;
+}
+
+
+static int
+add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to)
+{
+    PyType_Slot slots[6];
+    PyArray_DTypeMeta *dtypes[2] = {from, to};
+    PyArrayMethod_Spec spec = {
+            .name = "numeric_cast",
+            .nin = 1,
+            .nout = 1,
+            .flags = NPY_METH_SUPPORTS_UNALIGNED,
+            .slots = slots,
+            .dtypes = dtypes,
+    };
+
+    npy_intp from_itemsize = dtypes[0]->singleton->elsize;
+    npy_intp to_itemsize = dtypes[1]->singleton->elsize;
+
+    slots[0].slot = NPY_METH_resolve_descriptors;
+    slots[0].pfunc = &simple_cast_resolve_descriptors;
+    /* Fetch the optimized loops (2<<10 is a non-contiguous stride) */
+    slots[1].slot = NPY_METH_strided_loop;
+    slots[1].pfunc = PyArray_GetStridedNumericCastFn(
+            1, 2<<10, 2<<10, from->type_num, to->type_num);
+    slots[2].slot = NPY_METH_contiguous_loop;
+    slots[2].pfunc = PyArray_GetStridedNumericCastFn(
+            1, from_itemsize, to_itemsize, from->type_num, to->type_num);
+    slots[3].slot = NPY_METH_unaligned_strided_loop;
+    slots[3].pfunc = PyArray_GetStridedNumericCastFn(
+            0, 2<<10, 2<<10, from->type_num, to->type_num);
+    slots[4].slot = NPY_METH_unaligned_contiguous_loop;
+    slots[4].pfunc = PyArray_GetStridedNumericCastFn(
+            0, from_itemsize, to_itemsize, from->type_num, to->type_num);
+    slots[5].slot = 0;
+    slots[5].pfunc = NULL;
+
+    assert(slots[1].pfunc && slots[2].pfunc && slots[3].pfunc && slots[4].pfunc);
+
+    /* Find the correct casting level, and special case no-cast */
+    if (dtypes[0]->kind == dtypes[1]->kind && from_itemsize == to_itemsize) {
+        spec.casting = NPY_NO_CASTING;
+
+        /* When there is no casting (equivalent C-types) use byteswap loops */
+        slots[0].slot = NPY_METH_resolve_descriptors;
+        slots[0].pfunc = &legacy_same_dtype_resolve_descriptors;
+        slots[1].slot = NPY_METH_get_loop;
+        slots[1].pfunc = NULL;
+        slots[2].slot = 0;
+        slots[2].pfunc = NULL;
+
+        spec.name = "numeric_copy_or_byteswap";
+        spec.flags |= NPY_METH_NO_FLOATINGPOINT_ERRORS;
+    }
+    else if (_npy_can_cast_safely_table[from->type_num][to->type_num]) {
+        spec.casting = NPY_SAFE_CASTING;
+    }
+    else if (dtype_kind_to_ordering(dtypes[0]->kind) <=
+             dtype_kind_to_ordering(dtypes[1]->kind)) {
+        spec.casting = NPY_SAME_KIND_CASTING;
+    }
+    else {
+        spec.casting = NPY_UNSAFE_CASTING;
+    }
+
+    /* Create a bound method, unbind and store it */
+    return PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+}
+
+
+/*
+ * This registers the castingimpl for all casts between numeric types.
+ * Eventually, this function should likely be defined as part of a .c.src
+ * file to remove `PyArray_GetStridedNumericCastFn` entirely.
+ */
+static int
+PyArray_InitializeNumericCasts(void)
+{
+    for (int from = 0; from < NPY_NTYPES; from++) {
+        if (!PyTypeNum_ISNUMBER(from) && from != NPY_BOOL) {
+            continue;
+        }
+        PyArray_DTypeMeta *from_dt = PyArray_DTypeFromTypeNum(from);
+
+        for (int to = 0; to < NPY_NTYPES; to++) {
+            if (!PyTypeNum_ISNUMBER(to) && to != NPY_BOOL) {
+                continue;
+            }
+            PyArray_DTypeMeta *to_dt = PyArray_DTypeFromTypeNum(to);
+            int res = add_numeric_cast(from_dt, to_dt);
+            Py_DECREF(to_dt);
+            if (res < 0) {
+                Py_DECREF(from_dt);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+
+static int
+cast_to_string_resolve_descriptors(
+        PyArrayMethodObject *self,
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    /*
+     * NOTE: The following code used to be part of PyArray_AdaptFlexibleDType
+     *
+     * Get a string-size estimate of the input. These
+     * are generallly the size needed, rounded up to
+     * a multiple of eight.
+     */
+    npy_intp size = -1;
+    switch (dtypes[0]->type_num) {
+        case NPY_BOOL:
+        case NPY_UBYTE:
+        case NPY_BYTE:
+        case NPY_USHORT:
+        case NPY_SHORT:
+        case NPY_UINT:
+        case NPY_INT:
+        case NPY_ULONG:
+        case NPY_LONG:
+        case NPY_ULONGLONG:
+        case NPY_LONGLONG:
+            assert(dtypes[0]->singleton->elsize <= 8);
+            assert(dtypes[0]->singleton->elsize > 0);
+            if (dtypes[0]->kind == 'b') {
+                /* 5 chars needed for cast to 'True' or 'False' */
+                size = 5;
+            }
+            else if (dtypes[0]->kind == 'u') {
+                size = REQUIRED_STR_LEN[dtypes[0]->singleton->elsize];
+            }
+            else if (dtypes[0]->kind == 'i') {
+                /* Add character for sign symbol */
+                size = REQUIRED_STR_LEN[dtypes[0]->singleton->elsize] + 1;
+            }
+            break;
+        case NPY_HALF:
+        case NPY_FLOAT:
+        case NPY_DOUBLE:
+            size = 32;
+            break;
+        case NPY_LONGDOUBLE:
+            size = 48;
+            break;
+        case NPY_CFLOAT:
+        case NPY_CDOUBLE:
+            size = 2 * 32;
+            break;
+        case NPY_CLONGDOUBLE:
+            size = 2 * 48;
+            break;
+        case NPY_STRING:
+        case NPY_VOID:
+            size = given_descrs[0]->elsize;
+            break;
+        case NPY_UNICODE:
+            size = given_descrs[0]->elsize / 4;
+            break;
+        default:
+            PyErr_SetString(PyExc_SystemError,
+                    "Impossible cast to string path requested.");
+            return -1;
+    }
+    if (dtypes[1]->type_num == NPY_UNICODE) {
+        size *= 4;
+    }
+
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = PyArray_DescrNewFromType(dtypes[1]->type_num);
+        if (loop_descrs[1] == NULL) {
+            return -1;
+        }
+        loop_descrs[1]->elsize = size;
+    }
+    else {
+        /* The legacy loop can handle mismatching itemsizes */
+        loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]);
+        if (loop_descrs[1] == NULL) {
+            return -1;
+        }
+    }
+
+    /* Set the input one as well (late for easier error management) */
+    loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]);
+    if (loop_descrs[0] == NULL) {
+        return -1;
+    }
+
+    if (self->casting == NPY_UNSAFE_CASTING) {
+        assert(dtypes[0]->type_num == NPY_UNICODE &&
+               dtypes[1]->type_num == NPY_STRING);
+        return NPY_UNSAFE_CASTING;
+    }
+    assert(self->casting == NPY_SAFE_CASTING);
+
+    if (loop_descrs[1]->elsize >= size) {
+        return NPY_SAFE_CASTING;
+    }
+    return NPY_SAME_KIND_CASTING;
+}
+
+
+static int
+add_other_to_and_from_string_cast(
+        PyArray_DTypeMeta *string, PyArray_DTypeMeta *other)
+{
+    if (string == other) {
+        return 0;
+    }
+
+    /* Casting from string, is always a simple legacy-style cast */
+    if (other->type_num != NPY_STRING && other->type_num != NPY_UNICODE) {
+        if (PyArray_AddLegacyWrapping_CastingImpl(
+                string, other, NPY_UNSAFE_CASTING) < 0) {
+            return -1;
+        }
+    }
+    /*
+     * Casting to strings, is almost the same, but requires a custom resolver
+     * to define the correct string length. Right now we use a generic function
+     * for this.
+     */
+    PyArray_DTypeMeta *dtypes[2] = {other, string};
+    PyType_Slot slots[] = {
+            {NPY_METH_get_loop, NULL},
+            {NPY_METH_resolve_descriptors, &cast_to_string_resolve_descriptors},
+            {0,                 NULL}};
+    PyArrayMethod_Spec spec = {
+        .name = "legacy_cast_to_string",
+        .nin = 1,
+        .nout = 1,
+        .flags = NPY_METH_REQUIRES_PYAPI,
+        .dtypes = dtypes,
+        .slots = slots,
+    };
+    /* Almost everything can be safely cast to string (except unicode) */
+    if (other->type_num != NPY_UNICODE) {
+        spec.casting = NPY_SAFE_CASTING;
+    }
+    else {
+        spec.casting = NPY_UNSAFE_CASTING;
+    }
+
+    return PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+}
+
+
+NPY_NO_EXPORT NPY_CASTING
+string_to_string_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = ensure_dtype_nbo(loop_descrs[0]);
+        if (loop_descrs[1] == NULL) {
+            return -1;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    if (loop_descrs[0]->elsize == loop_descrs[1]->elsize) {
+        if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) ==
+                PyDataType_ISNOTSWAPPED(loop_descrs[1])) {
+            return NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+        }
+        else {
+            return NPY_EQUIV_CASTING;
+        }
+    }
+    else if (loop_descrs[0]->elsize <= loop_descrs[1]->elsize) {
+        return NPY_SAFE_CASTING;
+    }
+    return NPY_SAME_KIND_CASTING;
+}
+
+
+/*
+ * Add string casts. Right now all string casts are just legacy-wrapped ones
+ * (except string<->string and unicode<->unicode), but they do require
+ * custom type resolution for the string length.
+ *
+ * A bit like `object`, it could make sense to define a simpler protocol for
+ * string casts, however, we also need to remember that the itemsize of the
+ * output has to be found.
+ */
+static int
+PyArray_InitializeStringCasts(void)
+{
+    int result = -1;
+    PyArray_DTypeMeta *string = PyArray_DTypeFromTypeNum(NPY_STRING);
+    PyArray_DTypeMeta *unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE);
+    PyArray_DTypeMeta *other_dt = NULL;
+
+    /* Add most casts as legacy ones */
+    for (int other = 0; other < NPY_NTYPES; other++) {
+        if (PyTypeNum_ISDATETIME(other) || other == NPY_VOID ||
+                other == NPY_OBJECT) {
+            continue;
+        }
+        other_dt = PyArray_DTypeFromTypeNum(other);
+
+        /* The functions skip string == other_dt or unicode == other_dt */
+        if (add_other_to_and_from_string_cast(string, other_dt) < 0) {
+            goto finish;
+        }
+        if (add_other_to_and_from_string_cast(unicode, other_dt) < 0) {
+            goto finish;
+        }
+
+        Py_SETREF(other_dt, NULL);
+    }
+
+    /* string<->string and unicode<->unicode have their own specialized casts */
+    PyArray_DTypeMeta *dtypes[2];
+    PyType_Slot slots[] = {
+            {NPY_METH_get_loop, NULL},
+            {NPY_METH_resolve_descriptors, &string_to_string_resolve_descriptors},
+            {0,                 NULL}};
+    PyArrayMethod_Spec spec = {
+            .name = "string_to_string_cast",
+            .casting = NPY_NO_CASTING,
+            .nin = 1,
+            .nout = 1,
+            .flags = (NPY_METH_REQUIRES_PYAPI |
+                      NPY_METH_NO_FLOATINGPOINT_ERRORS |
+                      NPY_METH_SUPPORTS_UNALIGNED),
+            .dtypes = dtypes,
+            .slots = slots,
+    };
+
+    dtypes[0] = string;
+    dtypes[1] = string;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto finish;
+    }
+
+    dtypes[0] = unicode;
+    dtypes[1] = unicode;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto finish;
+    }
+
+    result = 0;
+  finish:
+    Py_DECREF(string);
+    Py_DECREF(unicode);
+    Py_XDECREF(other_dt);
+    return result;
+}
+
+
+/*
+ * Small helper function to handle the case of `arr.astype(dtype="V")`.
+ * When the output descriptor is not passed, we always use `V<itemsize>`
+ * of the other dtype.
+ */
+static NPY_CASTING
+cast_to_void_dtype_class(
+        PyArray_Descr **given_descrs, PyArray_Descr **loop_descrs)
+{
+    /* `dtype="V"` means unstructured currently (compare final path) */
+    loop_descrs[1] = PyArray_DescrNewFromType(NPY_VOID);
+    if (loop_descrs[1] == NULL) {
+        return -1;
+    }
+    loop_descrs[1]->elsize = given_descrs[0]->elsize;
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    return NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW;
+}
+
+
+static NPY_CASTING
+nonstructured_to_structured_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    NPY_CASTING casting;
+
+    if (given_descrs[1] == NULL) {
+        return cast_to_void_dtype_class(given_descrs, loop_descrs);
+    }
+
+    if (given_descrs[1]->subarray != NULL) {
+        /*
+         * We currently consider this at most a safe cast. It would be
+         * possible to allow a view if the field has exactly one element.
+         */
+        casting = NPY_SAFE_CASTING;
+        /* Subarray dtype */
+        NPY_CASTING base_casting = PyArray_GetCastSafety(
+                given_descrs[0], given_descrs[1]->subarray->base, NULL);
+        if (base_casting < 0) {
+            return -1;
+        }
+        casting = PyArray_MinCastSafety(casting, base_casting);
+    }
+    else if (given_descrs[1]->names != NULL) {
+        /* Structured dtype */
+        if (PyTuple_Size(given_descrs[1]->names) == 0) {
+            /* TODO: This retained behaviour, but likely should be changed. */
+            casting = NPY_UNSAFE_CASTING;
+        }
+        else {
+            /* Considered at most unsafe casting (but this could be changed) */
+            casting = NPY_UNSAFE_CASTING;
+            if (PyTuple_Size(given_descrs[1]->names) == 1) {
+                /* A view may be acceptable */
+                casting |= _NPY_CAST_IS_VIEW;
+            }
+
+            Py_ssize_t pos = 0;
+            PyObject *key, *tuple;
+            while (PyDict_Next(given_descrs[1]->fields, &pos, &key, &tuple)) {
+                PyArray_Descr *field_descr = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0);
+                NPY_CASTING field_casting = PyArray_GetCastSafety(
+                        given_descrs[0], field_descr, NULL);
+                casting = PyArray_MinCastSafety(casting, field_casting);
+                if (casting < 0) {
+                    return -1;
+                }
+            }
+        }
+    }
+    else {
+        /* Plain void type. This behaves much like a "view" */
+        if (given_descrs[0]->elsize == given_descrs[1]->elsize &&
+                !PyDataType_REFCHK(given_descrs[0])) {
+            /*
+             * A simple view, at the moment considered "safe" (the refcheck is
+             * probably not necessary, but more future proof
+             */
+            casting = NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW;
+        }
+        else if (given_descrs[0]->elsize <= given_descrs[1]->elsize) {
+            casting = NPY_SAFE_CASTING;
+        }
+        else {
+            casting = NPY_UNSAFE_CASTING;
+        }
+    }
+
+    /* Void dtypes always do the full cast. */
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    Py_INCREF(given_descrs[1]);
+    loop_descrs[1] = given_descrs[1];
+
+    return casting;
+}
+
+
+int give_bad_field_error(PyObject *key)
+{
+    if (!PyErr_Occurred()) {
+        PyErr_Format(PyExc_RuntimeError,
+                "Invalid or missing field %R, this should be impossible "
+                "and indicates a NumPy bug.", key);
+    }
+    return -1;
+}
+
+
+static PyObject *
+PyArray_GetGenericToVoidCastingImpl(void)
+{
+    static PyArrayMethodObject *method = NULL;
+
+    if (method != NULL) {
+        Py_INCREF(method);
+        return (PyObject *)method;
+    }
+
+    method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type);
+    if (method == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    method->name = "any_to_void_cast";
+    method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI;
+    method->casting = NPY_SAFE_CASTING;
+    method->resolve_descriptors = &nonstructured_to_structured_resolve_descriptors;
+    method->get_strided_loop = NULL;
+
+    return (PyObject *)method;
+}
+
+
+static NPY_CASTING
+structured_to_nonstructured_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    PyArray_Descr *base_descr;
+
+    if (given_descrs[0]->subarray != NULL) {
+        base_descr = given_descrs[0]->subarray->base;
+    }
+    else if (given_descrs[0]->names != NULL) {
+        if (PyTuple_Size(given_descrs[0]->names) != 1) {
+            /* Only allow casting a single field */
+            return -1;
+        }
+        PyObject *key = PyTuple_GetItem(given_descrs[0]->names, 0);
+        PyObject *base_tup = PyDict_GetItem(given_descrs[0]->fields, key);
+        base_descr = (PyArray_Descr *)PyTuple_GET_ITEM(base_tup, 0);
+    }
+    else {
+        /*
+         * unstructured voids are considered unsafe casts and defined, albeit,
+         * at this time they go back to legacy behaviour using getitem/setitem.
+         */
+        base_descr = NULL;
+    }
+
+    /*
+     * The cast is always considered unsafe, so the PyArray_GetCastSafety
+     * result currently does not matter.
+     */
+    if (base_descr != NULL && PyArray_GetCastSafety(
+            base_descr, given_descrs[1], dtypes[1]) < 0) {
+        return -1;
+    }
+
+    /* Void dtypes always do the full cast. */
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]);
+        /*
+         * Special case strings here, it should be useless (and only actually
+         * work for empty arrays).  Possibly this should simply raise for
+         * all parametric DTypes.
+         */
+        if (dtypes[1]->type_num == NPY_STRING) {
+            loop_descrs[1]->elsize = given_descrs[0]->elsize;
+        }
+        else if (dtypes[1]->type_num == NPY_UNICODE) {
+            loop_descrs[1]->elsize = given_descrs[0]->elsize * 4;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    return NPY_UNSAFE_CASTING;
+}
+
+
+static PyObject *
+PyArray_GetVoidToGenericCastingImpl(void)
+{
+    static PyArrayMethodObject *method = NULL;
+
+    if (method != NULL) {
+        Py_INCREF(method);
+        return (PyObject *)method;
+    }
+
+    method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type);
+    if (method == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    method->name = "void_to_any_cast";
+    method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI;
+    method->casting = NPY_UNSAFE_CASTING;
+    method->resolve_descriptors = &structured_to_nonstructured_resolve_descriptors;
+    method->get_strided_loop = NULL;
+
+    return (PyObject *)method;
+}
+
+
+/*
+ * Find the correct field casting safety.  See the TODO note below, including
+ * in 1.20 (and later) this was based on field names rather than field order
+ * which it should be using.
+ *
+ * NOTE: In theory it would be possible to cache the all the field casting
+ *       implementations on the dtype, to avoid duplicate work.
+ */
+static NPY_CASTING
+can_cast_fields_safety(PyArray_Descr *from, PyArray_Descr *to)
+{
+    NPY_CASTING casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+
+    Py_ssize_t field_count = PyTuple_Size(from->names);
+    if (field_count != PyTuple_Size(to->names)) {
+        /* TODO: This should be rejected! */
+        return NPY_UNSAFE_CASTING;
+    }
+    for (Py_ssize_t i = 0; i < field_count; i++) {
+        PyObject *from_key = PyTuple_GET_ITEM(from->names, i);
+        PyObject *from_tup = PyDict_GetItemWithError(from->fields, from_key);
+        if (from_tup == NULL) {
+            return give_bad_field_error(from_key);
+        }
+        PyArray_Descr *from_base = (PyArray_Descr*)PyTuple_GET_ITEM(from_tup, 0);
+
+        /*
+         * TODO: This should use to_key (order), compare gh-15509 by
+         *       by Allan Haldane.  And raise an error on failure.
+         *       (Fixing that may also requires fixing/changing promotion.)
+         */
+        PyObject *to_tup = PyDict_GetItem(to->fields, from_key);
+        if (to_tup == NULL) {
+            return NPY_UNSAFE_CASTING;
+        }
+        PyArray_Descr *to_base = (PyArray_Descr*)PyTuple_GET_ITEM(to_tup, 0);
+
+        NPY_CASTING field_casting = PyArray_GetCastSafety(from_base, to_base, NULL);
+        if (field_casting < 0) {
+            return -1;
+        }
+        casting = PyArray_MinCastSafety(casting, field_casting);
+    }
+    if (!(casting & _NPY_CAST_IS_VIEW)) {
+        assert((casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING);
+        return casting;
+    }
+
+    /*
+     * If the itemsize (includes padding at the end), fields, or names
+     * do not match, this cannot be a view and also not a "no" cast
+     * (identical dtypes).
+     * It may be possible that this can be relaxed in some cases.
+     */
+    if (from->elsize != to->elsize) {
+        /*
+         * The itemsize may mismatch even if all fields and formats match
+         * (due to additional padding).
+         */
+        return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
+    }
+
+    int cmp = PyObject_RichCompareBool(from->fields, to->fields, Py_EQ);
+    if (cmp != 1) {
+        if (cmp == -1) {
+            PyErr_Clear();
+        }
+        return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
+    }
+    cmp = PyObject_RichCompareBool(from->names, to->names, Py_EQ);
+    if (cmp != 1) {
+        if (cmp == -1) {
+            PyErr_Clear();
+        }
+        return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING);
+    }
+    return casting;
+}
+
+
+static NPY_CASTING
+void_to_void_resolve_descriptors(
+        PyArrayMethodObject *self,
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    NPY_CASTING casting;
+
+    if (given_descrs[1] == NULL) {
+        /* This is weird, since it doesn't return the original descr, but... */
+        return cast_to_void_dtype_class(given_descrs, loop_descrs);
+    }
+
+    if (given_descrs[0]->names != NULL && given_descrs[1]->names != NULL) {
+        /* From structured to structured, need to check fields */
+        casting = can_cast_fields_safety(given_descrs[0], given_descrs[1]);
+    }
+    else if (given_descrs[0]->names != NULL) {
+        return structured_to_nonstructured_resolve_descriptors(
+                self, dtypes, given_descrs, loop_descrs);
+    }
+    else if (given_descrs[1]->names != NULL) {
+        return nonstructured_to_structured_resolve_descriptors(
+                self, dtypes, given_descrs, loop_descrs);
+    }
+    else if (given_descrs[0]->subarray == NULL &&
+                given_descrs[1]->subarray == NULL) {
+        /* Both are plain void dtypes */
+        if (given_descrs[0]->elsize == given_descrs[1]->elsize) {
+            casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+        }
+        else if (given_descrs[0]->elsize < given_descrs[1]->elsize) {
+            casting = NPY_SAFE_CASTING;
+        }
+        else {
+            casting = NPY_SAME_KIND_CASTING;
+        }
+    }
+    else {
+        /*
+         * At this point, one of the dtypes must be a subarray dtype, the
+         * other is definitely not a structured one.
+         */
+        PyArray_ArrayDescr *from_sub = given_descrs[0]->subarray;
+        PyArray_ArrayDescr *to_sub = given_descrs[1]->subarray;
+        assert(from_sub || to_sub);
+
+        /* If the shapes do not match, this is at most an unsafe cast */
+        casting = NPY_UNSAFE_CASTING;
+        if (from_sub && to_sub) {
+            int res = PyObject_RichCompareBool(from_sub->shape, to_sub->shape, Py_EQ);
+            if (res < 0) {
+                return -1;
+            }
+            else if (res) {
+                /* Both are subarrays and the shape matches */
+                casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+            }
+        }
+        NPY_CASTING field_casting = PyArray_GetCastSafety(
+                given_descrs[0]->subarray->base, given_descrs[1]->subarray->base, NULL);
+        if (field_casting < 0) {
+            return -1;
+        }
+        casting = PyArray_MinCastSafety(casting, field_casting);
+    }
+
+    /* Void dtypes always do the full cast. */
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    Py_INCREF(given_descrs[1]);
+    loop_descrs[1] = given_descrs[1];
+
+    return casting;
+}
+
+
+/*
+ * This initializes the void to void cast. Voids include structured dtypes,
+ * which means that they can cast from and to any other dtype and, in that
+ * sense, are special (similar to Object).
+ */
+static int
+PyArray_InitializeVoidToVoidCast(void)
+{
+    PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID);
+    PyArray_DTypeMeta *dtypes[2] = {Void, Void};
+    PyType_Slot slots[] = {
+            {NPY_METH_get_loop, NULL},
+            {NPY_METH_resolve_descriptors, &void_to_void_resolve_descriptors},
+            {0,                 NULL}};
+    PyArrayMethod_Spec spec = {
+            .name = "void_to_void_cast",
+            .casting = NPY_NO_CASTING,
+            .nin = 1,
+            .nout = 1,
+            .flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED,
+            .dtypes = dtypes,
+            .slots = slots,
+    };
+
+    int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+    Py_DECREF(Void);
+    return res;
+}
+
+
+/*
+ * Implement object to any casting implementation. Casting from object may
+ * require inspecting of all array elements (for parametric dtypes), and
+ * the resolver will thus reject all parametric dtypes if the out dtype
+ * is not provided.
+ */
+static NPY_CASTING
+object_to_any_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    if (given_descrs[1] == NULL) {
+        /*
+         * This should not really be called, since object -> parametric casts
+         * require inspecting the object array. Allow legacy ones, the path
+         * here is that e.g. "M8" input is considered to be the DType class,
+         * and by allowing it here, we go back to the "M8" instance.
+         */
+        if (dtypes[1]->parametric) {
+            PyErr_Format(PyExc_TypeError,
+                    "casting from object to the parametric DType %S requires "
+                    "the specified output dtype instance. "
+                    "This may be a NumPy issue, since the correct instance "
+                    "should be discovered automatically, however.", dtypes[1]);
+            return -1;
+        }
+        loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]);
+        if (loop_descrs[1] == NULL) {
+            return -1;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    return NPY_UNSAFE_CASTING;
+}
+
+
+/*
+ * Casting to object is special since it is generic to all input dtypes.
+ */
+static PyObject *
+PyArray_GetObjectToGenericCastingImpl(void)
+{
+    static PyArrayMethodObject *method = NULL;
+
+    if (method != NULL) {
+        Py_INCREF(method);
+        return (PyObject *)method;
+    }
+
+    method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type);
+    if (method == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    method->nin = 1;
+    method->nout = 1;
+    method->name = "object_to_any_cast";
+    method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI;
+    method->casting = NPY_UNSAFE_CASTING;
+    method->resolve_descriptors = &object_to_any_resolve_descriptors;
+    method->get_strided_loop = NULL;
+
+    return (PyObject *)method;
+}
+
+
+
+/* Any object object is simple (could even use the default) */
+static NPY_CASTING
+any_to_object_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]);
+        if (loop_descrs[1] == NULL) {
+            return -1;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    return NPY_SAFE_CASTING;
+}
+
+
+/*
+ * Casting to object is special since it is generic to all input dtypes.
+ */
+static PyObject *
+PyArray_GetGenericToObjectCastingImpl(void)
+{
+    static PyArrayMethodObject *method = NULL;
+
+    if (method != NULL) {
+        Py_INCREF(method);
+        return (PyObject *)method;
+    }
+
+    method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type);
+    if (method == NULL) {
+        return PyErr_NoMemory();
+    }
+
+    method->nin = 1;
+    method->nout = 1;
+    method->name = "any_to_object_cast";
+    method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI;
+    method->casting = NPY_SAFE_CASTING;
+    method->resolve_descriptors = &any_to_object_resolve_descriptors;
+    method->get_strided_loop = NULL;
+
+    return (PyObject *)method;
+}
+
+
+static int
+PyArray_InitializeObjectToObjectCast(void)
+{
+    /*
+     * The object dtype does not support byte order changes, so its cast
+     * is always a direct view.
+     */
+    PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT);
+    PyArray_DTypeMeta *dtypes[2] = {Object, Object};
+    PyType_Slot slots[] = {
+            {NPY_METH_get_loop, NULL},
+            {0,                 NULL}};
+    PyArrayMethod_Spec spec = {
+            .name = "object_to_object_cast",
+            .casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW,
+            .nin = 1,
+            .nout = 1,
+            .flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED,
+            .dtypes = dtypes,
+            .slots = slots,
+    };
+
+    int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+    Py_DECREF(Object);
+    return res;
+}
+
+
+NPY_NO_EXPORT int
+PyArray_InitializeCasts()
+{
+    if (PyArray_InitializeNumericCasts() < 0) {
+        return -1;
+    }
+    if (PyArray_InitializeStringCasts() < 0) {
+        return -1;
+    }
+    if (PyArray_InitializeVoidToVoidCast() < 0) {
+        return -1;
+    }
+    if (PyArray_InitializeObjectToObjectCast() < 0) {
+        return -1;
+    }
+    /* Datetime casts are defined in datetime.c */
+    if (PyArray_InitializeDatetimeCasts() < 0) {
+        return -1;
+    }
+    return 0;
+}
diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h
index a2b36b497..cc1930f77 100644
--- a/numpy/core/src/multiarray/convert_datatype.h
+++ b/numpy/core/src/multiarray/convert_datatype.h
@@ -1,6 +1,13 @@
 #ifndef _NPY_ARRAY_CONVERT_DATATYPE_H_
 #define _NPY_ARRAY_CONVERT_DATATYPE_H_
 
+#include "array_method.h"
+
+extern NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[];
+
+NPY_NO_EXPORT PyObject *
+_get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args);
+
 NPY_NO_EXPORT PyArray_VectorUnaryFunc *
 PyArray_GetCastFunc(PyArray_Descr *descr, int type_num);
 
@@ -16,6 +23,9 @@ PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2);
 NPY_NO_EXPORT int
 PyArray_ValidType(int type);
 
+NPY_NO_EXPORT int
+dtype_kind_to_ordering(char kind);
+
 /* Like PyArray_CanCastArrayTo */
 NPY_NO_EXPORT npy_bool
 can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data,
@@ -36,26 +46,37 @@ npy_set_invalid_cast_error(
         PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
         NPY_CASTING casting, npy_bool scalar);
 
-/*
- * This function calls Py_DECREF on flex_dtype, and replaces it with
- * a new dtype that has been adapted based on the values in data_dtype
- * and data_obj. If the flex_dtype is not flexible, it returns it as-is.
- *
- * Usually, if data_obj is not an array, dtype should be the result
- * given by the PyArray_GetArrayParamsFromObject function.
- *
- * The data_obj may be NULL if just a dtype is known for the source.
- *
- * If *flex_dtype is NULL, returns immediately, without setting an
- * exception, leaving any previous error handling intact.
- *
- * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
- * and NPY_DATETIME with generic units.
- */
-NPY_NO_EXPORT PyArray_Descr *
-PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype);
-
 NPY_NO_EXPORT PyArray_Descr *
 PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType);
 
+NPY_NO_EXPORT int
+PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth);
+
+NPY_NO_EXPORT int
+PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private);
+
+NPY_NO_EXPORT NPY_CASTING
+PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2);
+
+NPY_NO_EXPORT NPY_CASTING
+PyArray_GetCastSafety(
+        PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype);
+
+NPY_NO_EXPORT NPY_CASTING
+legacy_same_dtype_resolve_descriptors(
+        PyArrayMethodObject *self,
+        PyArray_DTypeMeta **dtypes,
+        PyArray_Descr **given_descrs,
+        PyArray_Descr **loop_descrs);
+
+NPY_NO_EXPORT NPY_CASTING
+simple_cast_resolve_descriptors(
+        PyArrayMethodObject *self,
+        PyArray_DTypeMeta **dtypes,
+        PyArray_Descr **input_descrs,
+        PyArray_Descr **loop_descrs);
+
+NPY_NO_EXPORT int
+PyArray_InitializeCasts(void);
+
 #endif
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 2426076b9..f6031e370 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -756,9 +756,11 @@ PyArray_NewFromDescr_int(
         Py_DECREF(descr);
         return NULL;
     }
+    fa->_buffer_info = NULL;
     fa->nd = nd;
     fa->dimensions = NULL;
     fa->data = NULL;
+
     if (data == NULL) {
         fa->flags = NPY_ARRAY_DEFAULT;
         if (flags) {
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 4afc45fb6..9c1b606bb 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -25,6 +25,9 @@
 #include "_datetime.h"
 #include "datetime_strings.h"
 #include "convert_datatype.h"
+#include "array_method.h"
+#include "dtypemeta.h"
+#include "usertypes.h"
 
 /*
  * Computes the python `ret, d = divmod(d, unit)`.
@@ -3725,3 +3728,375 @@ find_object_datetime_type(PyObject *obj, int type_num)
         return NULL;
     }
 }
+
+
+
+
+/*
+ * Describes casting within datetimes or timedelta
+ */
+static NPY_CASTING
+time_to_time_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    /* This is a within-dtype cast, which currently must handle byteswapping */
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    if (given_descrs[1] == NULL) {
+        loop_descrs[1] = ensure_dtype_nbo(given_descrs[0]);
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    int is_timedelta = given_descrs[0]->type_num == NPY_TIMEDELTA;
+
+    if (given_descrs[0] == given_descrs[1]) {
+        return NPY_NO_CASTING | _NPY_CAST_IS_VIEW;
+    }
+
+    NPY_CASTING byteorder_may_allow_view = 0;
+    if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) ==
+            PyDataType_ISNOTSWAPPED(loop_descrs[1])) {
+        byteorder_may_allow_view = _NPY_CAST_IS_VIEW;
+    }
+    PyArray_DatetimeMetaData *meta1, *meta2;
+    meta1 = get_datetime_metadata_from_dtype(loop_descrs[0]);
+    assert(meta1 != NULL);
+    meta2 = get_datetime_metadata_from_dtype(loop_descrs[1]);
+    assert(meta2 != NULL);
+
+    if (meta1->base == meta2->base && meta1->num == meta2->num) {
+        if (byteorder_may_allow_view) {
+            return NPY_NO_CASTING | byteorder_may_allow_view;
+        }
+        return NPY_EQUIV_CASTING;
+    }
+    else if (meta1->base == NPY_FR_GENERIC) {
+        return NPY_SAFE_CASTING | byteorder_may_allow_view;
+    }
+    else if (meta2->base == NPY_FR_GENERIC) {
+        /* TODO: This is actually an invalid cast (casting will error) */
+        return NPY_UNSAFE_CASTING;
+    }
+    else if (is_timedelta && (
+            /* jump between time units and date units is unsafe for timedelta */
+            (meta1->base <= NPY_FR_M && meta2->base > NPY_FR_M) ||
+            (meta1->base > NPY_FR_M && meta2->base <= NPY_FR_M))) {
+        return NPY_UNSAFE_CASTING;
+    }
+    else if (meta1->base <= meta2->base) {
+        /* Casting to a more precise unit is currently considered safe */
+        if (datetime_metadata_divides(meta1, meta2, is_timedelta)) {
+            /* If it divides, we consider it to be a safe cast */
+            return NPY_SAFE_CASTING;
+        }
+        else {
+            return NPY_SAME_KIND_CASTING;
+        }
+    }
+    return NPY_SAME_KIND_CASTING;
+}
+
+
+/* Handles datetime<->timedelta type resolution (both directions) */
+static NPY_CASTING
+datetime_to_timedelta_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]);
+    if (loop_descrs[0] == NULL) {
+        return -1;
+    }
+    if (given_descrs[1] == NULL) {
+        PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(given_descrs[0]);
+        assert(meta != NULL);
+        loop_descrs[1] = create_datetime_dtype(dtypes[1]->type_num, meta);
+    }
+    else {
+        loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]);
+    }
+    if (loop_descrs[1] == NULL) {
+        Py_DECREF(loop_descrs[0]);
+        return -1;
+    }
+    /*
+     * Mostly NPY_UNSAFE_CASTING is not true, the cast will fail.
+     * TODO: Once ufuncs use dtype specific promotion rules,
+     *       this is likely unnecessary
+     */
+    return NPY_UNSAFE_CASTING;
+}
+
+
+/* In the current setup both strings and unicode casts support all outputs */
+static NPY_CASTING
+time_to_string_resolve_descriptors(
+        PyArrayMethodObject *self,
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr **given_descrs,
+        PyArray_Descr **loop_descrs)
+{
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+    if (given_descrs[1] != NULL) {
+        /*
+         * At the time of writing, NumPy does not check the length here,
+         * but will error if filling fails.
+         */
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+    else {
+        /* Find the correct string length, possibly based on the unit */
+        int size;
+        if (given_descrs[0]->type_num == NPY_DATETIME) {
+            PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(given_descrs[0]);
+            assert(meta != NULL);
+            size = get_datetime_iso_8601_strlen(0, meta->base);
+        }
+        else {
+            size = 21;
+        }
+        if (dtypes[1]->type_num == NPY_UNICODE) {
+            size *= 4;
+        }
+        loop_descrs[1] = PyArray_DescrNewFromType(dtypes[1]->type_num);
+        if (loop_descrs[1] == NULL) {
+            Py_DECREF(loop_descrs[0]);
+            return -1;
+        }
+        loop_descrs[1]->elsize = size;
+    }
+    assert(self->casting == NPY_UNSAFE_CASTING);
+    return NPY_UNSAFE_CASTING;
+}
+
+
+static NPY_CASTING
+string_to_datetime_cast_resolve_descriptors(
+        PyArrayMethodObject *NPY_UNUSED(self),
+        PyArray_DTypeMeta *dtypes[2],
+        PyArray_Descr *given_descrs[2],
+        PyArray_Descr *loop_descrs[2])
+{
+    /* We currently support byte-swapping, so any (unicode) string is OK */
+    Py_INCREF(given_descrs[0]);
+    loop_descrs[0] = given_descrs[0];
+
+    if (given_descrs[1] == NULL) {
+        /* NOTE: This doesn't actually work, and will error during the cast */
+        loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]);
+        if (loop_descrs[1] == NULL) {
+            Py_DECREF(loop_descrs[0]);
+            return -1;
+        }
+    }
+    else {
+        Py_INCREF(given_descrs[1]);
+        loop_descrs[1] = given_descrs[1];
+    }
+
+    return NPY_UNSAFE_CASTING;
+}
+
+
+/*
+ * This registers the castingimpl for all datetime related casts.
+ */
+NPY_NO_EXPORT int
+PyArray_InitializeDatetimeCasts()
+{
+    int result = -1;
+
+    PyType_Slot slots[3];
+    PyArray_DTypeMeta *dtypes[2];
+    PyArrayMethod_Spec spec = {
+        .name = "datetime_casts",
+        .nin = 1,
+        .nout = 1,
+        .casting = NPY_NO_CASTING,
+        .flags = NPY_METH_SUPPORTS_UNALIGNED,
+        .slots = slots,
+        .dtypes = dtypes,
+    };
+    slots[0].slot = NPY_METH_resolve_descriptors;
+    slots[0].pfunc = &time_to_time_resolve_descriptors;
+    slots[1].slot = NPY_METH_get_loop;
+    slots[1].pfunc = NULL;
+    slots[2].slot = 0;
+    slots[2].pfunc = NULL;
+
+    PyArray_DTypeMeta *datetime = PyArray_DTypeFromTypeNum(NPY_DATETIME);
+    PyArray_DTypeMeta *timedelta = PyArray_DTypeFromTypeNum(NPY_TIMEDELTA);
+    PyArray_DTypeMeta *string = PyArray_DTypeFromTypeNum(NPY_STRING);
+    PyArray_DTypeMeta *unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE);
+    PyArray_DTypeMeta *tmp = NULL;
+
+    dtypes[0] = datetime;
+    dtypes[1] = datetime;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto fail;
+    }
+    dtypes[0] = timedelta;
+    dtypes[1] = timedelta;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto fail;
+    }
+
+    /*
+     * Casting between timedelta and datetime uses legacy casting loops, but
+     * custom dtype resolution (to handle copying of the time unit).
+     */
+    slots[0].slot = NPY_METH_resolve_descriptors;
+    slots[0].pfunc = &datetime_to_timedelta_resolve_descriptors;
+    slots[1].slot = NPY_METH_get_loop;
+    slots[1].pfunc = NULL;
+    slots[2].slot = 0;
+    slots[2].pfunc = NULL;
+
+    spec.name = "timedelta_and_datetime_cast";
+    dtypes[0] = timedelta;
+    dtypes[1] = datetime;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto fail;
+    }
+    spec.name = "datetime_to_timedelta_cast";
+    dtypes[0] = datetime;
+    dtypes[1] = timedelta;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto fail;
+    }
+
+    /*
+     * Cast from numeric types to times.  These use the cast functions
+     * as stored on the datatype, which should be replaced at some point.
+     * Some of these casts can fail (casting to unitless datetime), but these
+     * are rather special.
+     */
+    for (int num = 0; num < NPY_NTYPES; num++) {
+        if (!PyTypeNum_ISNUMBER(num) && num != NPY_BOOL) {
+            continue;
+        }
+
+        Py_XSETREF(tmp, PyArray_DTypeFromTypeNum(num));
+
+        if (PyArray_AddLegacyWrapping_CastingImpl(
+                tmp, datetime, NPY_UNSAFE_CASTING) < 0) {
+            goto fail;
+        }
+        if (PyArray_AddLegacyWrapping_CastingImpl(
+                datetime, tmp, NPY_UNSAFE_CASTING) < 0) {
+            goto fail;
+        }
+
+        NPY_CASTING to_timedelta_casting = NPY_UNSAFE_CASTING;
+        if (PyTypeNum_ISINTEGER(num) || num == NPY_BOOL) {
+            /* timedelta casts like int64 right now... */
+            if (PyTypeNum_ISUNSIGNED(num) && tmp->singleton->elsize == 8) {
+                to_timedelta_casting = NPY_SAME_KIND_CASTING;
+            }
+            else {
+                to_timedelta_casting = NPY_SAFE_CASTING;
+            }
+        }
+        if (PyArray_AddLegacyWrapping_CastingImpl(
+                tmp, timedelta, to_timedelta_casting) < 0) {
+            goto fail;
+        }
+        if (PyArray_AddLegacyWrapping_CastingImpl(
+                timedelta, tmp, NPY_UNSAFE_CASTING) < 0) {
+            goto fail;
+        }
+    }
+
+    /*
+     * Cast times to string and unicode
+     */
+    spec.casting = NPY_UNSAFE_CASTING;
+    /*
+     * Casts can error and need API (unicodes needs it for string->unicode).
+     * Unicode handling is currently implemented via a legacy cast.
+     */
+    spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI;
+
+    slots[0].slot = NPY_METH_resolve_descriptors;
+    slots[0].pfunc = &time_to_string_resolve_descriptors;
+    slots[1].slot = NPY_METH_get_loop;
+    slots[1].pfunc = NULL;
+    slots[2].slot = 0;
+    slots[2].pfunc = NULL;
+
+    for (int num = NPY_DATETIME; num <= NPY_TIMEDELTA; num++) {
+        for (int str = NPY_STRING; str <= NPY_UNICODE; str++) {
+            dtypes[0] = PyArray_DTypeFromTypeNum(num);
+            dtypes[1] = PyArray_DTypeFromTypeNum(str);
+
+            int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+            Py_SETREF(dtypes[0], NULL);
+            Py_SETREF(dtypes[1], NULL);
+            if (res < 0) {
+                return -1;
+            }
+        }
+    }
+
+    /*
+     * Cast strings to timedelta are currently only legacy casts
+     */
+    if (PyArray_AddLegacyWrapping_CastingImpl(
+            string, timedelta, NPY_UNSAFE_CASTING) < 0) {
+        goto fail;
+    }
+    if (PyArray_AddLegacyWrapping_CastingImpl(
+            unicode, timedelta, NPY_UNSAFE_CASTING) < 0) {
+        goto fail;
+    }
+
+    /*
+     * Cast strings to datetime
+     */
+    dtypes[1] = datetime;
+    spec.casting = NPY_UNSAFE_CASTING;
+
+    /* The default type resolution should work fine. */
+    slots[0].slot = NPY_METH_resolve_descriptors;
+    slots[0].pfunc = &string_to_datetime_cast_resolve_descriptors;
+    slots[1].slot = NPY_METH_get_loop;
+    slots[1].pfunc = NULL;
+    slots[2].slot = 0;
+    slots[2].pfunc = NULL;
+
+    dtypes[0] = string;
+    spec.flags = NPY_METH_SUPPORTS_UNALIGNED;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto fail;
+    }
+
+    dtypes[0] = unicode;
+    /*
+     * Unicode handling is currently implemented via a legacy cast, which
+     * requires the Python API.
+     */
+    spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI;
+    if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) {
+        goto fail;
+    }
+
+    result = 0;
+  fail:
+    Py_DECREF(datetime);
+    Py_DECREF(timedelta);
+    Py_DECREF(string);
+    Py_DECREF(unicode);
+    Py_XDECREF(tmp);
+    return result;
+}
+
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index af4e6c22e..630bd76f3 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -1006,9 +1006,8 @@ _strided_to_strided_string_to_datetime(char *dst, npy_intp dst_stride,
 /*
  * Assumes src_dtype and dst_dtype are both datetimes or both timedeltas
  */
-static int
+NPY_NO_EXPORT int
 get_nbo_cast_datetime_transfer_function(int aligned,
-                            npy_intp src_stride, npy_intp dst_stride,
                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                             PyArray_StridedUnaryOp **out_stransfer,
                             NpyAuxData **out_transferdata)
@@ -1082,12 +1081,10 @@ get_nbo_cast_datetime_transfer_function(int aligned,
     return NPY_SUCCEED;
 }
 
-static int
-get_nbo_datetime_to_string_transfer_function(int aligned,
-                            npy_intp src_stride, npy_intp dst_stride,
-                            PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
-                            PyArray_StridedUnaryOp **out_stransfer,
-                            NpyAuxData **out_transferdata)
+NPY_NO_EXPORT int
+get_nbo_datetime_to_string_transfer_function(
+        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+        PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata)
 {
     PyArray_DatetimeMetaData *src_meta;
     _strided_datetime_cast_data *data;
@@ -1127,7 +1124,7 @@ get_nbo_datetime_to_string_transfer_function(int aligned,
     return NPY_SUCCEED;
 }
 
-static int
+NPY_NO_EXPORT int
 get_datetime_to_unicode_transfer_function(int aligned,
                             npy_intp src_stride, npy_intp dst_stride,
                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
@@ -1140,8 +1137,8 @@ get_datetime_to_unicode_transfer_function(int aligned,
     PyArray_Descr *str_dtype;
 
     /* Get an ASCII string data type, adapted to match the UNICODE one */
-    str_dtype = PyArray_DescrFromType(NPY_STRING);
-    str_dtype = PyArray_AdaptFlexibleDType(dst_dtype, str_dtype);
+    str_dtype = PyArray_DescrNewFromType(NPY_STRING);
+    str_dtype->elsize = dst_dtype->elsize / 4;
     if (str_dtype == NULL) {
         return NPY_FAIL;
     }
@@ -1156,10 +1153,9 @@ get_datetime_to_unicode_transfer_function(int aligned,
     }
 
     /* Get the NBO datetime to string aligned contig function */
-    if (get_nbo_datetime_to_string_transfer_function(1,
-                            src_dtype->elsize, str_dtype->elsize,
-                            src_dtype, str_dtype,
-                            &caststransfer, &castdata) != NPY_SUCCEED) {
+    if (get_nbo_datetime_to_string_transfer_function(
+            src_dtype, str_dtype,
+            &caststransfer, &castdata) != NPY_SUCCEED) {
         Py_DECREF(str_dtype);
         NPY_AUXDATA_FREE(todata);
         return NPY_FAIL;
@@ -1198,12 +1194,10 @@ get_datetime_to_unicode_transfer_function(int aligned,
     return NPY_SUCCEED;
 }
 
-static int
-get_nbo_string_to_datetime_transfer_function(int aligned,
-                            npy_intp src_stride, npy_intp dst_stride,
-                            PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
-                            PyArray_StridedUnaryOp **out_stransfer,
-                            NpyAuxData **out_transferdata)
+NPY_NO_EXPORT int
+get_nbo_string_to_datetime_transfer_function(
+        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+        PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata)
 {
     PyArray_DatetimeMetaData *dst_meta;
     _strided_datetime_cast_data *data;
@@ -1250,7 +1244,7 @@ get_nbo_string_to_datetime_transfer_function(int aligned,
     return NPY_SUCCEED;
 }
 
-static int
+NPY_NO_EXPORT int
 get_unicode_to_datetime_transfer_function(int aligned,
                             npy_intp src_stride, npy_intp dst_stride,
                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
@@ -1263,11 +1257,12 @@ get_unicode_to_datetime_transfer_function(int aligned,
     PyArray_Descr *str_dtype;
 
     /* Get an ASCII string data type, adapted to match the UNICODE one */
-    str_dtype = PyArray_DescrFromType(NPY_STRING);
-    str_dtype = PyArray_AdaptFlexibleDType(src_dtype, str_dtype);
+    str_dtype = PyArray_DescrNewFromType(NPY_STRING);
     if (str_dtype == NULL) {
         return NPY_FAIL;
     }
+    assert(src_dtype->type_num == NPY_UNICODE);
+    str_dtype->elsize = src_dtype->elsize / 4;
 
     /* Get the cast operation from src */
     if (PyArray_GetDTypeTransferFunction(aligned,
@@ -1281,10 +1276,9 @@ get_unicode_to_datetime_transfer_function(int aligned,
     }
 
     /* Get the string to NBO datetime aligned contig function */
-    if (get_nbo_string_to_datetime_transfer_function(1,
-                            str_dtype->elsize, dst_dtype->elsize,
-                            str_dtype, dst_dtype,
-                            &caststransfer, &castdata) != NPY_SUCCEED) {
+    if (get_nbo_string_to_datetime_transfer_function(
+            str_dtype, dst_dtype,
+            &caststransfer, &castdata) != NPY_SUCCEED) {
         Py_DECREF(str_dtype);
         NPY_AUXDATA_FREE(todata);
         return NPY_FAIL;
@@ -1323,7 +1317,7 @@ get_unicode_to_datetime_transfer_function(int aligned,
 }
 
 
-static int
+NPY_NO_EXPORT int
 get_legacy_dtype_cast_function(
         int aligned, npy_intp src_stride, npy_intp dst_stride,
         PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
@@ -1502,7 +1496,6 @@ get_nbo_cast_transfer_function(int aligned,
             *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
                               !PyArray_ISNBO(dst_dtype->byteorder);
             return get_nbo_cast_datetime_transfer_function(aligned,
-                                        src_stride, dst_stride,
                                         src_dtype, dst_dtype,
                                         out_stransfer, out_transferdata);
         }
@@ -1518,10 +1511,8 @@ get_nbo_cast_transfer_function(int aligned,
                     *out_needs_api = 1;
                     *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder);
                     return get_nbo_datetime_to_string_transfer_function(
-                                        aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata);
+                            src_dtype, dst_dtype,
+                            out_stransfer, out_transferdata);
 
                 case NPY_UNICODE:
                     return get_datetime_to_unicode_transfer_function(
@@ -1538,10 +1529,8 @@ get_nbo_cast_transfer_function(int aligned,
                     *out_needs_api = 1;
                     *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder);
                     return get_nbo_string_to_datetime_transfer_function(
-                                        aligned,
-                                        src_stride, dst_stride,
-                                        src_dtype, dst_dtype,
-                                        out_stransfer, out_transferdata);
+                            src_dtype, dst_dtype,
+                            out_stransfer, out_transferdata);
 
                 case NPY_UNICODE:
                     return get_unicode_to_datetime_transfer_function(
@@ -1561,7 +1550,7 @@ get_nbo_cast_transfer_function(int aligned,
 }
 
 
-static int
+NPY_NO_EXPORT int
 wrap_aligned_contig_transfer_function_with_copyswapn(
         int aligned, npy_intp src_stride, npy_intp dst_stride,
         PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
@@ -1570,7 +1559,7 @@ wrap_aligned_contig_transfer_function_with_copyswapn(
         PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata)
 {
     NpyAuxData *todata = NULL, *fromdata = NULL;
-    PyArray_StridedUnaryOp *tobuffer, *frombuffer;
+    PyArray_StridedUnaryOp *tobuffer = NULL, *frombuffer = NULL;
     npy_intp src_itemsize = src_dtype->elsize;
     npy_intp dst_itemsize = dst_dtype->elsize;
 
@@ -3768,6 +3757,53 @@ PyArray_GetDTypeTransferFunction(int aligned,
                     out_needs_api);
 }
 
+
+/*
+ * Basic version of PyArray_GetDTypeTransferFunction for legacy dtype
+ * support.
+ * It supports only wrapping the copyswapn functions and the legacy
+ * cast functions registered with `PyArray_RegisterCastFunc`.
+ * This function takes the easy way out: It does not wrap
+ */
+NPY_NO_EXPORT int
+PyArray_GetLegacyDTypeTransferFunction(int aligned,
+        npy_intp src_stride, npy_intp dst_stride,
+        PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+        int move_references,
+        PyArray_StridedUnaryOp **out_stransfer,
+        NpyAuxData **out_transferdata,
+        int *out_needs_api)
+{
+    /* Note: We ignore `needs_wrap`; needs-wrap is handled by another cast */
+    int needs_wrap = 0;
+
+    if (src_dtype->type_num == dst_dtype->type_num) {
+        /*
+         * This is a cast within the same dtype. For legacy user-dtypes,
+         * it is always valid to handle this using the copy swap function.
+         */
+        return wrap_copy_swap_function(aligned,
+                src_stride, dst_stride,
+                src_dtype,
+                PyArray_ISNBO(src_dtype->byteorder) !=
+                PyArray_ISNBO(dst_dtype->byteorder),
+                out_stransfer, out_transferdata);
+    }
+
+    if (get_legacy_dtype_cast_function(aligned,
+            src_stride, dst_stride,
+            src_dtype, dst_dtype,
+            move_references,
+            out_stransfer,
+            out_transferdata,
+            out_needs_api,
+            &needs_wrap) != NPY_SUCCEED) {
+        return NPY_FAIL;
+    }
+    return NPY_SUCCEED;
+}
+
+
 NPY_NO_EXPORT int
 PyArray_GetMaskedDTypeTransferFunction(int aligned,
                             npy_intp src_stride,
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index e63a60738..4c11723e7 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -27,6 +27,7 @@ dtypemeta_dealloc(PyArray_DTypeMeta *self) {
 
     Py_XDECREF(self->scalar_type);
     Py_XDECREF(self->singleton);
+    Py_XDECREF(self->castingimpls);
     PyType_Type.tp_dealloc((PyObject *) self);
 }
 
@@ -565,6 +566,12 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
 
     /* Let python finish the initialization (probably unnecessary) */
     if (PyType_Ready((PyTypeObject *)dtype_class) < 0) {
+        Py_DECREF(dtype_class);
+        return -1;
+    }
+    dtype_class->castingimpls = PyDict_New();
+    if (dtype_class->castingimpls == NULL) {
+        Py_DECREF(dtype_class);
         return -1;
     }
 
diff --git a/numpy/core/src/multiarray/legacy_dtype_implementation.c b/numpy/core/src/multiarray/legacy_dtype_implementation.c
new file mode 100644
index 000000000..3ce4710fd
--- /dev/null
+++ b/numpy/core/src/multiarray/legacy_dtype_implementation.c
@@ -0,0 +1,716 @@
+/*
+ * This file hosts legacy implementations of certain functions for
+ * which alternatives exists, but the old functions are still required
+ * in certain code paths, or until the code transition is finalized.
+ *
+ * This code should typically not require modification, and if modified
+ * similar changes may be necessary in the new version.
+ */
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "scalartypes.h"
+#include "_datetime.h"
+#include "datetime_strings.h"
+#include "convert_datatype.h"
+
+#include "legacy_dtype_implementation.h"
+
+
+/*
+ * Compare the field dictionaries for two types.
+ *
+ * Return 1 if the field types and field names of the two descrs are equal and
+ * in the same order, 0 if not.
+ */
+static int
+_equivalent_fields(PyArray_Descr *type1, PyArray_Descr *type2) {
+
+    int val;
+
+    if (type1->fields == type2->fields && type1->names == type2->names) {
+        return 1;
+    }
+    if (type1->fields == NULL || type2->fields == NULL) {
+        return 0;
+    }
+
+    val = PyObject_RichCompareBool(type1->fields, type2->fields, Py_EQ);
+    if (val != 1 || PyErr_Occurred()) {
+        PyErr_Clear();
+        return 0;
+    }
+
+    val = PyObject_RichCompareBool(type1->names, type2->names, Py_EQ);
+    if (val != 1 || PyErr_Occurred()) {
+        PyErr_Clear();
+        return 0;
+    }
+
+    return 1;
+}
+
+/*
+ * Compare the subarray data for two types.
+ * Return 1 if they are the same, 0 if not.
+ */
+static int
+_equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2)
+{
+    int val;
+
+    if (sub1 == sub2) {
+        return 1;
+
+    }
+    if (sub1 == NULL || sub2 == NULL) {
+        return 0;
+    }
+
+    val = PyObject_RichCompareBool(sub1->shape, sub2->shape, Py_EQ);
+    if (val != 1 || PyErr_Occurred()) {
+        PyErr_Clear();
+        return 0;
+    }
+
+    return PyArray_EquivTypes(sub1->base, sub2->base);
+}
+
+
+NPY_NO_EXPORT unsigned char
+PyArray_LegacyEquivTypes(PyArray_Descr *type1, PyArray_Descr *type2)
+{
+    int type_num1, type_num2, size1, size2;
+
+    if (type1 == type2) {
+        return NPY_TRUE;
+    }
+
+    type_num1 = type1->type_num;
+    type_num2 = type2->type_num;
+    size1 = type1->elsize;
+    size2 = type2->elsize;
+
+    if (size1 != size2) {
+        return NPY_FALSE;
+    }
+    if (PyArray_ISNBO(type1->byteorder) != PyArray_ISNBO(type2->byteorder)) {
+        return NPY_FALSE;
+    }
+    if (type1->subarray || type2->subarray) {
+        return ((type_num1 == type_num2)
+                && _equivalent_subarrays(type1->subarray, type2->subarray));
+    }
+    if (type_num1 == NPY_VOID || type_num2 == NPY_VOID) {
+        return ((type_num1 == type_num2) && _equivalent_fields(type1, type2));
+    }
+    if (type_num1 == NPY_DATETIME
+        || type_num1 == NPY_TIMEDELTA
+        || type_num2 == NPY_DATETIME
+        || type_num2 == NPY_TIMEDELTA) {
+        return ((type_num1 == type_num2)
+                && has_equivalent_datetime_metadata(type1, type2));
+    }
+    return type1->kind == type2->kind;
+}
+
+
+NPY_NO_EXPORT unsigned char
+PyArray_LegacyEquivTypenums(int typenum1, int typenum2)
+{
+    PyArray_Descr *d1, *d2;
+    npy_bool ret;
+
+    if (typenum1 == typenum2) {
+        return NPY_SUCCEED;
+    }
+
+    d1 = PyArray_DescrFromType(typenum1);
+    d2 = PyArray_DescrFromType(typenum2);
+    ret = PyArray_LegacyEquivTypes(d1, d2);
+    Py_DECREF(d1);
+    Py_DECREF(d2);
+    return ret;
+}
+
+
+NPY_NO_EXPORT int
+PyArray_LegacyCanCastSafely(int fromtype, int totype)
+{
+    PyArray_Descr *from;
+
+    /* Fast table lookup for small type numbers */
+    if ((unsigned int)fromtype < NPY_NTYPES &&
+        (unsigned int)totype < NPY_NTYPES) {
+        return _npy_can_cast_safely_table[fromtype][totype];
+    }
+
+    /* Identity */
+    if (fromtype == totype) {
+        return 1;
+    }
+
+    from = PyArray_DescrFromType(fromtype);
+    /*
+     * cancastto is a NPY_NOTYPE terminated C-int-array of types that
+     * the data-type can be cast to safely.
+     */
+    if (from->f->cancastto) {
+        int *curtype = from->f->cancastto;
+
+        while (*curtype != NPY_NOTYPE) {
+            if (*curtype++ == totype) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
+
+NPY_NO_EXPORT npy_bool
+PyArray_LegacyCanCastTo(PyArray_Descr *from, PyArray_Descr *to)
+{
+    int from_type_num = from->type_num;
+    int to_type_num = to->type_num;
+    npy_bool ret;
+
+    ret = (npy_bool) PyArray_LegacyCanCastSafely(from_type_num, to_type_num);
+    if (ret) {
+        /* Check String and Unicode more closely */
+        if (from_type_num == NPY_STRING) {
+            if (to_type_num == NPY_STRING) {
+                ret = (from->elsize <= to->elsize);
+            }
+            else if (to_type_num == NPY_UNICODE) {
+                ret = (from->elsize << 2 <= to->elsize);
+            }
+        }
+        else if (from_type_num == NPY_UNICODE) {
+            if (to_type_num == NPY_UNICODE) {
+                ret = (from->elsize <= to->elsize);
+            }
+        }
+            /*
+             * For datetime/timedelta, only treat casts moving towards
+             * more precision as safe.
+             */
+        else if (from_type_num == NPY_DATETIME && to_type_num == NPY_DATETIME) {
+            PyArray_DatetimeMetaData *meta1, *meta2;
+            meta1 = get_datetime_metadata_from_dtype(from);
+            if (meta1 == NULL) {
+                PyErr_Clear();
+                return 0;
+            }
+            meta2 = get_datetime_metadata_from_dtype(to);
+            if (meta2 == NULL) {
+                PyErr_Clear();
+                return 0;
+            }
+
+            return can_cast_datetime64_metadata(meta1, meta2,
+                    NPY_SAFE_CASTING);
+        }
+        else if (from_type_num == NPY_TIMEDELTA &&
+                 to_type_num == NPY_TIMEDELTA) {
+            PyArray_DatetimeMetaData *meta1, *meta2;
+            meta1 = get_datetime_metadata_from_dtype(from);
+            if (meta1 == NULL) {
+                PyErr_Clear();
+                return 0;
+            }
+            meta2 = get_datetime_metadata_from_dtype(to);
+            if (meta2 == NULL) {
+                PyErr_Clear();
+                return 0;
+            }
+
+            return can_cast_timedelta64_metadata(meta1, meta2,
+                    NPY_SAFE_CASTING);
+        }
+            /*
+             * If to_type_num is STRING or unicode
+             * see if the length is long enough to hold the
+             * stringified value of the object.
+             */
+        else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) {
+            /*
+             * Boolean value cast to string type is 5 characters max
+             * for string 'False'.
+             */
+            int char_size = 1;
+            if (to_type_num == NPY_UNICODE) {
+                char_size = 4;
+            }
+
+            ret = 0;
+            if (PyDataType_ISUNSIZED(to)) {
+                ret = 1;
+            }
+                /*
+                 * Need at least 5 characters to convert from boolean
+                 * to 'True' or 'False'.
+                 */
+            else if (from->kind == 'b' && to->elsize >= 5 * char_size) {
+                ret = 1;
+            }
+            else if (from->kind == 'u') {
+                /* Guard against unexpected integer size */
+                if (from->elsize > 8 || from->elsize < 0) {
+                    ret = 0;
+                }
+                else if (to->elsize >=
+                         REQUIRED_STR_LEN[from->elsize] * char_size) {
+                    ret = 1;
+                }
+            }
+            else if (from->kind == 'i') {
+                /* Guard against unexpected integer size */
+                if (from->elsize > 8 || from->elsize < 0) {
+                    ret = 0;
+                }
+                    /* Extra character needed for sign */
+                else if (to->elsize >=
+                         (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) {
+                    ret = 1;
+                }
+            }
+        }
+    }
+    return ret;
+}
+
+
+/*
+ * Compare two field dictionaries for castability.
+ *
+ * Return 1 if 'field1' can be cast to 'field2' according to the rule
+ * 'casting', 0 if not.
+ *
+ * Castabiliy of field dictionaries is defined recursively: 'field1' and
+ * 'field2' must have the same field names (possibly in different
+ * orders), and the corresponding field types must be castable according
+ * to the given casting rule.
+ */
+static int
+can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting)
+{
+    Py_ssize_t ppos;
+    PyObject *key;
+    PyObject *tuple1, *tuple2;
+
+    if (field1 == field2) {
+        return 1;
+    }
+    if (field1 == NULL || field2 == NULL) {
+        return 0;
+    }
+    if (PyDict_Size(field1) != PyDict_Size(field2)) {
+        return 0;
+    }
+
+    /* Iterate over all the fields and compare for castability */
+    ppos = 0;
+    while (PyDict_Next(field1, &ppos, &key, &tuple1)) {
+        if ((tuple2 = PyDict_GetItem(field2, key)) == NULL) {
+            return 0;
+        }
+        /* Compare the dtype of the field for castability */
+        if (!PyArray_CanCastTypeTo(
+                        (PyArray_Descr *)PyTuple_GET_ITEM(tuple1, 0),
+                        (PyArray_Descr *)PyTuple_GET_ITEM(tuple2, 0),
+                        casting)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+
+NPY_NO_EXPORT npy_bool
+PyArray_LegacyCanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to,
+        NPY_CASTING casting)
+{
+    /*
+     * Fast paths for equality and for basic types.
+     */
+    if (from == to ||
+        ((NPY_LIKELY(PyDataType_ISNUMBER(from)) ||
+          PyDataType_ISOBJECT(from)) &&
+         NPY_LIKELY(from->type_num == to->type_num) &&
+         NPY_LIKELY(from->byteorder == to->byteorder))) {
+        return 1;
+    }
+    /*
+     * Cases with subarrays and fields need special treatment.
+     */
+    if (PyDataType_HASFIELDS(from)) {
+        /*
+         * If from is a structured data type, then it can be cast to a simple
+         * non-object one only for unsafe casting *and* if it has a single
+         * field; recurse just in case the single field is itself structured.
+         */
+        if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) {
+            if (casting == NPY_UNSAFE_CASTING &&
+                    PyDict_Size(from->fields) == 1) {
+                Py_ssize_t ppos = 0;
+                PyObject *tuple;
+                PyArray_Descr *field;
+                PyDict_Next(from->fields, &ppos, NULL, &tuple);
+                field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0);
+                /*
+                 * For a subarray, we need to get the underlying type;
+                 * since we already are casting unsafely, we can ignore
+                 * the shape.
+                 */
+                if (PyDataType_HASSUBARRAY(field)) {
+                    field = field->subarray->base;
+                }
+                return PyArray_LegacyCanCastTypeTo(field, to, casting);
+            }
+            else {
+                return 0;
+            }
+        }
+        /*
+         * Casting from one structured data type to another depends on the fields;
+         * we pass that case on to the EquivTypenums case below.
+         *
+         * TODO: move that part up here? Need to check whether equivalent type
+         * numbers is an addition constraint that is needed.
+         *
+         * TODO/FIXME: For now, always allow structured to structured for unsafe
+         * casting; this is not correct, but needed since the treatment in can_cast
+         * below got out of sync with astype; see gh-13667.
+         */
+        if (casting == NPY_UNSAFE_CASTING) {
+            return 1;
+        }
+    }
+    else if (PyDataType_HASFIELDS(to)) {
+        /*
+         * If "from" is a simple data type and "to" has fields, then only
+         * unsafe casting works (and that works always, even to multiple fields).
+         */
+        return casting == NPY_UNSAFE_CASTING;
+    }
+    /*
+     * Everything else we consider castable for unsafe for now.
+     * FIXME: ensure what we do here is consistent with "astype",
+     * i.e., deal more correctly with subarrays and user-defined dtype.
+     */
+    else if (casting == NPY_UNSAFE_CASTING) {
+        return 1;
+    }
+    /*
+     * Equivalent simple types can be cast with any value of 'casting', but
+     * we need to be careful about structured to structured.
+     */
+    if (PyArray_LegacyEquivTypenums(from->type_num, to->type_num)) {
+        /* For complicated case, use EquivTypes (for now) */
+        if (PyTypeNum_ISUSERDEF(from->type_num) ||
+                        from->subarray != NULL) {
+            int ret;
+
+            /* Only NPY_NO_CASTING prevents byte order conversion */
+            if ((casting != NPY_NO_CASTING) &&
+                                (!PyArray_ISNBO(from->byteorder) ||
+                                 !PyArray_ISNBO(to->byteorder))) {
+                PyArray_Descr *nbo_from, *nbo_to;
+
+                nbo_from = PyArray_DescrNewByteorder(from, NPY_NATIVE);
+                nbo_to = PyArray_DescrNewByteorder(to, NPY_NATIVE);
+                if (nbo_from == NULL || nbo_to == NULL) {
+                    Py_XDECREF(nbo_from);
+                    Py_XDECREF(nbo_to);
+                    PyErr_Clear();
+                    return 0;
+                }
+                ret = PyArray_LegacyEquivTypes(nbo_from, nbo_to);
+                Py_DECREF(nbo_from);
+                Py_DECREF(nbo_to);
+            }
+            else {
+                ret = PyArray_LegacyEquivTypes(from, to);
+            }
+            return ret;
+        }
+
+        if (PyDataType_HASFIELDS(from)) {
+            switch (casting) {
+                case NPY_EQUIV_CASTING:
+                case NPY_SAFE_CASTING:
+                case NPY_SAME_KIND_CASTING:
+                    /*
+                     * `from' and `to' must have the same fields, and
+                     * corresponding fields must be (recursively) castable.
+                     */
+                    return can_cast_fields(from->fields, to->fields, casting);
+
+                case NPY_NO_CASTING:
+                default:
+                    return PyArray_LegacyEquivTypes(from, to);
+            }
+        }
+
+        switch (from->type_num) {
+            case NPY_DATETIME: {
+                PyArray_DatetimeMetaData *meta1, *meta2;
+                meta1 = get_datetime_metadata_from_dtype(from);
+                if (meta1 == NULL) {
+                    PyErr_Clear();
+                    return 0;
+                }
+                meta2 = get_datetime_metadata_from_dtype(to);
+                if (meta2 == NULL) {
+                    PyErr_Clear();
+                    return 0;
+                }
+
+                if (casting == NPY_NO_CASTING) {
+                    return PyArray_ISNBO(from->byteorder) ==
+                                        PyArray_ISNBO(to->byteorder) &&
+                            can_cast_datetime64_metadata(meta1, meta2, casting);
+                }
+                else {
+                    return can_cast_datetime64_metadata(meta1, meta2, casting);
+                }
+            }
+            case NPY_TIMEDELTA: {
+                PyArray_DatetimeMetaData *meta1, *meta2;
+                meta1 = get_datetime_metadata_from_dtype(from);
+                if (meta1 == NULL) {
+                    PyErr_Clear();
+                    return 0;
+                }
+                meta2 = get_datetime_metadata_from_dtype(to);
+                if (meta2 == NULL) {
+                    PyErr_Clear();
+                    return 0;
+                }
+
+                if (casting == NPY_NO_CASTING) {
+                    return PyArray_ISNBO(from->byteorder) ==
+                                        PyArray_ISNBO(to->byteorder) &&
+                        can_cast_timedelta64_metadata(meta1, meta2, casting);
+                }
+                else {
+                    return can_cast_timedelta64_metadata(meta1, meta2, casting);
+                }
+            }
+            default:
+                switch (casting) {
+                    case NPY_NO_CASTING:
+                        return PyArray_LegacyEquivTypes(from, to);
+                    case NPY_EQUIV_CASTING:
+                        return (from->elsize == to->elsize);
+                    case NPY_SAFE_CASTING:
+                        return (from->elsize <= to->elsize);
+                    default:
+                        return 1;
+                }
+                break;
+        }
+    }
+    /* If safe or same-kind casts are allowed */
+    else if (casting == NPY_SAFE_CASTING || casting == NPY_SAME_KIND_CASTING) {
+        if (PyArray_LegacyCanCastTo(from, to)) {
+            return 1;
+        }
+        else if(casting == NPY_SAME_KIND_CASTING) {
+            /*
+             * Also allow casting from lower to higher kinds, according
+             * to the ordering provided by dtype_kind_to_ordering.
+             * Some kinds, like datetime, don't fit in the hierarchy,
+             * and are special cased as -1.
+             */
+            int from_order, to_order;
+
+            from_order = dtype_kind_to_ordering(from->kind);
+            to_order = dtype_kind_to_ordering(to->kind);
+
+            if (to->kind == 'm') {
+                /* both types being timedelta is already handled before. */
+                int integer_order = dtype_kind_to_ordering('i');
+                return (from_order != -1) && (from_order <= integer_order);
+            }
+
+            return (from_order != -1) && (from_order <= to_order);
+        }
+        else {
+            return 0;
+        }
+    }
+    /* NPY_NO_CASTING or NPY_EQUIV_CASTING was specified */
+    else {
+        return 0;
+    }
+}
+
+
+/*
+ * Legacy function to find the correct dtype when casting from any built-in
+ * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic
+ * units.
+ *
+ * This function returns a dtype based on flex_dtype and the values in
+ * data_dtype. It also calls Py_DECREF on the flex_dtype. If the
+ * flex_dtype is not flexible, it returns it as-is.
+ *
+ * Usually, if data_obj is not an array, dtype should be the result
+ * given by the PyArray_GetArrayParamsFromObject function.
+ *
+ * If *flex_dtype is NULL, returns immediately, without setting an
+ * exception, leaving any previous error handling intact.
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype)
+{
+    PyArray_DatetimeMetaData *meta;
+    PyArray_Descr *retval = NULL;
+    int flex_type_num;
+
+    if (flex_dtype == NULL) {
+        return retval;
+    }
+
+    flex_type_num = flex_dtype->type_num;
+
+    /* Flexible types with expandable size */
+    if (PyDataType_ISUNSIZED(flex_dtype)) {
+        /* First replace the flex_dtype */
+        retval = PyArray_DescrNew(flex_dtype);
+        Py_DECREF(flex_dtype);
+        if (retval == NULL) {
+            return retval;
+        }
+
+        if (data_dtype->type_num == flex_type_num ||
+            flex_type_num == NPY_VOID) {
+            (retval)->elsize = data_dtype->elsize;
+        }
+        else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) {
+            npy_intp size = 8;
+
+            /*
+             * Get a string-size estimate of the input. These
+             * are generallly the size needed, rounded up to
+             * a multiple of eight.
+             */
+            switch (data_dtype->type_num) {
+                case NPY_BOOL:
+                case NPY_UBYTE:
+                case NPY_BYTE:
+                case NPY_USHORT:
+                case NPY_SHORT:
+                case NPY_UINT:
+                case NPY_INT:
+                case NPY_ULONG:
+                case NPY_LONG:
+                case NPY_ULONGLONG:
+                case NPY_LONGLONG:
+                    if (data_dtype->kind == 'b') {
+                        /* 5 chars needed for cast to 'True' or 'False' */
+                        size = 5;
+                    }
+                    else if (data_dtype->elsize > 8 ||
+                             data_dtype->elsize < 0) {
+                        /*
+                         * Element size should never be greater than 8 or
+                         * less than 0 for integer type, but just in case...
+                         */
+                        break;
+                    }
+                    else if (data_dtype->kind == 'u') {
+                        size = REQUIRED_STR_LEN[data_dtype->elsize];
+                    }
+                    else if (data_dtype->kind == 'i') {
+                        /* Add character for sign symbol */
+                        size = REQUIRED_STR_LEN[data_dtype->elsize] + 1;
+                    }
+                    break;
+                case NPY_HALF:
+                case NPY_FLOAT:
+                case NPY_DOUBLE:
+                    size = 32;
+                    break;
+                case NPY_LONGDOUBLE:
+                    size = 48;
+                    break;
+                case NPY_CFLOAT:
+                case NPY_CDOUBLE:
+                    size = 2 * 32;
+                    break;
+                case NPY_CLONGDOUBLE:
+                    size = 2 * 48;
+                    break;
+                case NPY_OBJECT:
+                    size = 64;
+                    break;
+                case NPY_STRING:
+                case NPY_VOID:
+                    size = data_dtype->elsize;
+                    break;
+                case NPY_UNICODE:
+                    size = data_dtype->elsize / 4;
+                    break;
+                case NPY_DATETIME:
+                    meta = get_datetime_metadata_from_dtype(data_dtype);
+                    if (meta == NULL) {
+                        Py_DECREF(retval);
+                        return NULL;
+                    }
+                    size = get_datetime_iso_8601_strlen(0, meta->base);
+                    break;
+                case NPY_TIMEDELTA:
+                    size = 21;
+                    break;
+            }
+
+            if (flex_type_num == NPY_STRING) {
+                retval->elsize = size;
+            }
+            else if (flex_type_num == NPY_UNICODE) {
+                retval->elsize = size * 4;
+            }
+        }
+        else {
+            /*
+             * We should never get here, but just in case someone adds
+             * a new flex dtype...
+             */
+            PyErr_SetString(PyExc_TypeError,
+                    "don't know how to adapt flex dtype");
+            Py_DECREF(retval);
+            return NULL;
+        }
+    }
+        /* Flexible type with generic time unit that adapts */
+    else if (flex_type_num == NPY_DATETIME ||
+             flex_type_num == NPY_TIMEDELTA) {
+        meta = get_datetime_metadata_from_dtype(flex_dtype);
+        retval = flex_dtype;
+        if (meta == NULL) {
+            return NULL;
+        }
+
+        if (meta->base == NPY_FR_GENERIC) {
+            if (data_dtype->type_num == NPY_DATETIME ||
+                data_dtype->type_num == NPY_TIMEDELTA) {
+                meta = get_datetime_metadata_from_dtype(data_dtype);
+                if (meta == NULL) {
+                    return NULL;
+                }
+
+                retval = create_datetime_dtype(flex_type_num, meta);
+                Py_DECREF(flex_dtype);
+            }
+        }
+    }
+    else {
+        retval = flex_dtype;
+    }
+    return retval;
+}
diff --git a/numpy/core/src/multiarray/legacy_dtype_implementation.h b/numpy/core/src/multiarray/legacy_dtype_implementation.h
new file mode 100644
index 000000000..ca171d773
--- /dev/null
+++ b/numpy/core/src/multiarray/legacy_dtype_implementation.h
@@ -0,0 +1,40 @@
+#ifndef _NPY_LEGACY_DTYPE_IMPLEMENTATION_H
+#define _NPY_LEGACY_DTYPE_IMPLEMENTATION_H
+
+
+NPY_NO_EXPORT unsigned char
+PyArray_LegacyEquivTypes(PyArray_Descr *type1, PyArray_Descr *type2);
+
+NPY_NO_EXPORT unsigned char
+PyArray_LegacyEquivTypenums(int typenum1, int typenum2);
+
+NPY_NO_EXPORT int
+PyArray_LegacyCanCastSafely(int fromtype, int totype);
+
+NPY_NO_EXPORT npy_bool
+PyArray_LegacyCanCastTo(PyArray_Descr *from, PyArray_Descr *to);
+
+NPY_NO_EXPORT npy_bool
+PyArray_LegacyCanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to,
+        NPY_CASTING casting);
+
+/*
+ * This function calls Py_DECREF on flex_dtype, and replaces it with
+ * a new dtype that has been adapted based on the values in data_dtype
+ * and data_obj. If the flex_dtype is not flexible, it returns it as-is.
+ *
+ * Usually, if data_obj is not an array, dtype should be the result
+ * given by the PyArray_GetArrayParamsFromObject function.
+ *
+ * The data_obj may be NULL if just a dtype is known for the source.
+ *
+ * If *flex_dtype is NULL, returns immediately, without setting an
+ * exception, leaving any previous error handling intact.
+ *
+ * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
+ * and NPY_DATETIME with generic units.
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype);
+
+#endif /*_NPY_LEGACY_DTYPE_IMPLEMENTATION_H*/
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 76df2337b..9c8bb4135 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -2180,7 +2180,7 @@ static PyObject *
 array_sizeof(PyArrayObject *self)
 {
     /* object + dimension and strides */
-    Py_ssize_t nbytes = NPY_SIZEOF_PYARRAYOBJECT +
+    Py_ssize_t nbytes = Py_TYPE(self)->tp_basicsize +
         PyArray_NDIM(self) * sizeof(npy_intp) * 2;
     if (PyArray_CHKFLAGS(self, NPY_ARRAY_OWNDATA)) {
         nbytes += PyArray_NBYTES(self);
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 1aad70dc6..af5949e73 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -30,6 +30,8 @@
 #include "npy_config.h"
 #include "npy_pycompat.h"
 #include "npy_import.h"
+#include "convert_datatype.h"
+#include "legacy_dtype_implementation.h"
 
 NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 
@@ -1480,65 +1482,6 @@ array_putmask(PyObject *NPY_UNUSED(module), PyObject *args, PyObject *kwds)
     return PyArray_PutMask((PyArrayObject *)array, values, mask);
 }
 
-/*
- * Compare the field dictionaries for two types.
- *
- * Return 1 if the field types and field names of the two descrs are equal and
- * in the same order, 0 if not.
- */
-static int
-_equivalent_fields(PyArray_Descr *type1, PyArray_Descr *type2) {
-
-    int val;
-
-    if (type1->fields == type2->fields && type1->names == type2->names) {
-        return 1;
-    }
-    if (type1->fields == NULL || type2->fields == NULL) {
-        return 0;
-    }
-
-    val = PyObject_RichCompareBool(type1->fields, type2->fields, Py_EQ);
-    if (val != 1 || PyErr_Occurred()) {
-        PyErr_Clear();
-        return 0;
-    }
-
-    val = PyObject_RichCompareBool(type1->names, type2->names, Py_EQ);
-    if (val != 1 || PyErr_Occurred()) {
-        PyErr_Clear();
-        return 0;
-    }
-
-    return 1;
-}
-
-/*
- * Compare the subarray data for two types.
- * Return 1 if they are the same, 0 if not.
- */
-static int
-_equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2)
-{
-    int val;
-
-    if (sub1 == sub2) {
-        return 1;
-
-    }
-    if (sub1 == NULL || sub2 == NULL) {
-        return 0;
-    }
-
-    val = PyObject_RichCompareBool(sub1->shape, sub2->shape, Py_EQ);
-    if (val != 1 || PyErr_Occurred()) {
-        PyErr_Clear();
-        return 0;
-    }
-
-    return PyArray_EquivTypes(sub1->base, sub2->base);
-}
-
 
 /*NUMPY_API
  *
@@ -1548,40 +1491,24 @@ _equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2)
 NPY_NO_EXPORT unsigned char
 PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2)
 {
-    int type_num1, type_num2, size1, size2;
-
-    if (type1 == type2) {
-        return NPY_TRUE;
-    }
-
-    type_num1 = type1->type_num;
-    type_num2 = type2->type_num;
-    size1 = type1->elsize;
-    size2 = type2->elsize;
-
-    if (size1 != size2) {
-        return NPY_FALSE;
-    }
-    if (PyArray_ISNBO(type1->byteorder) != PyArray_ISNBO(type2->byteorder)) {
-        return NPY_FALSE;
-    }
-    if (type1->subarray || type2->subarray) {
-        return ((type_num1 == type_num2)
-                && _equivalent_subarrays(type1->subarray, type2->subarray));
-    }
-    if (type_num1 == NPY_VOID || type_num2 == NPY_VOID) {
-        return ((type_num1 == type_num2) && _equivalent_fields(type1, type2));
-    }
-    if (type_num1 == NPY_DATETIME
-            || type_num1 == NPY_TIMEDELTA
-            || type_num2 == NPY_DATETIME
-            || type_num2 == NPY_TIMEDELTA) {
-        return ((type_num1 == type_num2)
-                && has_equivalent_datetime_metadata(type1, type2));
+#if NPY_USE_NEW_CASTINGIMPL
+    /*
+     * Do not use PyArray_CanCastTypeTo because it supports legacy flexible
+     * dtypes as input.
+     */
+    NPY_CASTING safety = PyArray_GetCastSafety(type1, type2, NULL);
+    if (safety < 0) {
+        PyErr_Clear();
+        return 0;
     }
-    return type1->kind == type2->kind;
+    /* If casting is "no casting" this dtypes are considered equivalent. */
+    return PyArray_MinCastSafety(safety, NPY_NO_CASTING) == NPY_NO_CASTING;
+#else
+    return PyArray_LegacyEquivTypes(type1, type2);
+#endif
 }
 
+
 /*NUMPY_API*/
 NPY_NO_EXPORT unsigned char
 PyArray_EquivTypenums(int typenum1, int typenum2)
@@ -2003,20 +1930,41 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
     int alloc = 0;
     void *dptr;
     PyObject *ret;
-
+    PyObject *base = NULL;
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O:scalar", kwlist,
                 &PyArrayDescr_Type, &typecode, &obj)) {
         return NULL;
     }
     if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) {
-        if (!PySequence_Check(obj)) {
-            PyErr_SetString(PyExc_TypeError,
-                            "found non-sequence while unpickling scalar with "
-                            "NPY_LIST_PICKLE set");
+        if (typecode->type_num == NPY_OBJECT) {
+            /* Deprecated 2020-11-24, NumPy 1.20 */
+            if (DEPRECATE(
+                    "Unpickling a scalar with object dtype is deprecated. "
+                    "Object scalars should never be created. If this was a "
+                    "properly created pickle, please open a NumPy issue. In "
+                    "a best effort this returns the original object.") < 0) {
+                return NULL;
+            }
+            Py_INCREF(obj);
+            return obj;
+        }
+        /* We store the full array to unpack it here: */
+        if (!PyArray_CheckExact(obj)) {
+            /* We pickle structured voids as arrays currently */
+            PyErr_SetString(PyExc_RuntimeError,
+                    "Unpickling NPY_LIST_PICKLE (structured void) scalar "
+                    "requires an array.  The pickle file may be corrupted?");
             return NULL;
         }
-        dptr = &obj;
+        if (!PyArray_EquivTypes(PyArray_DESCR((PyArrayObject *)obj), typecode)) {
+            PyErr_SetString(PyExc_RuntimeError,
+                    "Pickled array is not compatible with requested scalar "
+                    "dtype.  The pickle file may be corrupted?");
+            return NULL;
+        }
+        base = obj;
+        dptr = PyArray_BYTES((PyArrayObject *)obj);
     }
 
     else if (PyDataType_FLAGCHK(typecode, NPY_ITEM_IS_POINTER)) {
@@ -2066,7 +2014,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds)
             dptr = PyBytes_AS_STRING(obj);
         }
     }
-    ret = PyArray_Scalar(dptr, typecode, NULL);
+    ret = PyArray_Scalar(dptr, typecode, base);
 
     /* free dptr which contains zeros */
     if (alloc) {
@@ -4299,6 +4247,8 @@ static struct PyMethodDef array_module_methods[] = {
         METH_VARARGS, NULL},
     {"_discover_array_parameters", (PyCFunction)_discover_array_parameters,
         METH_VARARGS | METH_KEYWORDS, NULL},
+    {"_get_castingimpl",  (PyCFunction)_get_castingimpl,
+     METH_VARARGS | METH_KEYWORDS, NULL},
     /* from umath */
     {"frompyfunc",
         (PyCFunction) ufunc_frompyfunc,
@@ -4317,6 +4267,7 @@ static struct PyMethodDef array_module_methods[] = {
 };
 
 #include "__multiarray_api.c"
+#include "array_method.h"
 
 /* Establish scalar-type hierarchy
  *
@@ -4767,9 +4718,20 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
     if (set_typeinfo(d) != 0) {
         goto err;
     }
+    if (PyType_Ready(&PyArrayMethod_Type) < 0) {
+        goto err;
+    }
+    if (PyType_Ready(&PyBoundArrayMethod_Type) < 0) {
+        goto err;
+    }
     if (initialize_and_map_pytypes_to_dtypes() < 0) {
         goto err;
     }
+
+    if (PyArray_InitializeCasts() < 0) {
+        goto err;
+    }
+
     if (initumath(m) != 0) {
         goto err;
     }
diff --git a/numpy/core/src/multiarray/npy_buffer.h b/numpy/core/src/multiarray/npy_buffer.h
index 5ff8b6c2c..d10f1a020 100644
--- a/numpy/core/src/multiarray/npy_buffer.h
+++ b/numpy/core/src/multiarray/npy_buffer.h
@@ -3,8 +3,8 @@
 
 extern NPY_NO_EXPORT PyBufferProcs array_as_buffer;
 
-NPY_NO_EXPORT void
-_dealloc_cached_buffer_info(PyObject *self);
+NPY_NO_EXPORT int
+_buffer_info_free(void *buffer_info, PyObject *obj);
 
 NPY_NO_EXPORT PyArray_Descr*
 _descriptor_from_pep3118_format(char const *s);
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index f04bdbaa8..d018fccbb 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -67,8 +67,11 @@ gentype_alloc(PyTypeObject *type, Py_ssize_t nitems)
     const size_t size = _PyObject_VAR_SIZE(type, nitems + 1);
 
     obj = (PyObject *)PyObject_Malloc(size);
+    if (obj == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
     /*
-     * Fixme. Need to check for no memory.
      * If we don't need to zero memory, we could use
      * PyObject_{New, NewVar} for this whole function.
      */
@@ -1742,13 +1745,8 @@ gentype_reduce(PyObject *self, PyObject *NPY_UNUSED(args))
         if (arr == NULL) {
             return NULL;
         }
-        /* arr.item() */
-        PyObject *val = PyArray_GETITEM(arr, PyArray_DATA(arr));
-        Py_DECREF(arr);
-        if (val == NULL) {
-            return NULL;
-        }
-        PyObject *tup = Py_BuildValue("NN", obj, val);
+        /* Use the whole array which handles sturctured void correctly */
+        PyObject *tup = Py_BuildValue("NN", obj, arr);
         if (tup == NULL) {
             return NULL;
         }
@@ -2601,16 +2599,18 @@ NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type = {
     .tp_basicsize = sizeof(PyObject),
 };
 
+
 static void
 void_dealloc(PyVoidScalarObject *v)
 {
-    _dealloc_cached_buffer_info((PyObject *)v);
-
     if (v->flags & NPY_ARRAY_OWNDATA) {
         npy_free_cache(v->obval, Py_SIZE(v));
     }
     Py_XDECREF(v->descr);
     Py_XDECREF(v->base);
+    if (_buffer_info_free(v->_buffer_info, (PyObject *)v) < 0) {
+        PyErr_WriteUnraisable(NULL);
+    }
     Py_TYPE(v)->tp_free(v);
 }
 
diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c
index 1404c9b68..3eaf99196 100644
--- a/numpy/core/src/multiarray/usertypes.c
+++ b/numpy/core/src/multiarray/usertypes.c
@@ -39,6 +39,10 @@ maintainer email:  oliphant.travis@ieee.org
 #include "usertypes.h"
 #include "dtypemeta.h"
 #include "scalartypes.h"
+#include "array_method.h"
+#include "convert_datatype.h"
+#include "legacy_dtype_implementation.h"
+
 
 NPY_NO_EXPORT PyArray_Descr **userdescrs=NULL;
 
@@ -488,3 +492,65 @@ legacy_userdtype_common_dtype_function(
     Py_INCREF(Py_NotImplemented);
     return (PyArray_DTypeMeta *)Py_NotImplemented;
 }
+
+
+/**
+ * This function wraps a legacy cast into an array-method. This is mostly
+ * used for legacy user-dtypes, but for example numeric to/from datetime
+ * casts were only defined that way as well.
+ *
+ * @param from
+ * @param to
+ * @param casting If `NPY_NO_CASTING` will check the legacy registered cast,
+ *        otherwise uses the provided cast.
+ */
+NPY_NO_EXPORT int
+PyArray_AddLegacyWrapping_CastingImpl(
+        PyArray_DTypeMeta *from, PyArray_DTypeMeta *to, NPY_CASTING casting)
+{
+    if (casting < 0) {
+        if (from == to) {
+            casting = NPY_NO_CASTING;
+        }
+        else if (PyArray_LegacyCanCastTypeTo(
+                from->singleton, to->singleton, NPY_SAFE_CASTING)) {
+            casting = NPY_SAFE_CASTING;
+        }
+        else if (PyArray_LegacyCanCastTypeTo(
+                from->singleton, to->singleton, NPY_SAME_KIND_CASTING)) {
+            casting = NPY_SAME_KIND_CASTING;
+        }
+        else {
+            casting = NPY_UNSAFE_CASTING;
+        }
+    }
+
+    PyArray_DTypeMeta *dtypes[2] = {from, to};
+    PyArrayMethod_Spec spec = {
+            /* Name is not actually used, but allows identifying these. */
+            .name = "legacy_cast",
+            .nin = 1,
+            .nout = 1,
+            .casting = casting,
+            .dtypes = dtypes,
+    };
+
+    if (from == to) {
+        spec.flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED;
+        PyType_Slot slots[] = {
+            {NPY_METH_get_loop, NULL},
+            {NPY_METH_resolve_descriptors, &legacy_same_dtype_resolve_descriptors},
+            {0, NULL}};
+        spec.slots = slots;
+        return PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+    }
+    else {
+        spec.flags = NPY_METH_REQUIRES_PYAPI;
+        PyType_Slot slots[] = {
+            {NPY_METH_get_loop, NULL},
+            {NPY_METH_resolve_descriptors, &simple_cast_resolve_descriptors},
+            {0, NULL}};
+        spec.slots = slots;
+        return PyArray_AddCastingImplementation_FromSpec(&spec, 1);
+    }
+}
diff --git a/numpy/core/src/multiarray/usertypes.h b/numpy/core/src/multiarray/usertypes.h
index 1b323d458..8b2fc80e6 100644
--- a/numpy/core/src/multiarray/usertypes.h
+++ b/numpy/core/src/multiarray/usertypes.h
@@ -1,6 +1,8 @@
 #ifndef _NPY_PRIVATE_USERTYPES_H_
 #define _NPY_PRIVATE_USERTYPES_H_
 
+#include "array_method.h"
+
 extern NPY_NO_EXPORT PyArray_Descr **userdescrs;
 
 NPY_NO_EXPORT void
@@ -21,4 +23,8 @@ NPY_NO_EXPORT PyArray_DTypeMeta *
 legacy_userdtype_common_dtype_function(
         PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other);
 
+NPY_NO_EXPORT int
+PyArray_AddLegacyWrapping_CastingImpl(
+        PyArray_DTypeMeta *from, PyArray_DTypeMeta *to, NPY_CASTING casting);
+
 #endif
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index 18b6d1434..ff4663dc3 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -398,8 +398,8 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x)
 /**end repeat1**/
 
 /**begin repeat1
- * #kind = atan2,hypot,pow,fmod,copysign#
- * #KIND = ATAN2,HYPOT,POW,FMOD,COPYSIGN#
+ * #kind = atan2,hypot,pow,copysign#
+ * #KIND = ATAN2,HYPOT,POW,COPYSIGN#
  */
 #ifdef @kind@@c@
 #undef @kind@@c@
@@ -412,6 +412,32 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y)
 #endif
 /**end repeat1**/
 
+/**begin repeat1
+ * #kind = fmod#
+ * #KIND = FMOD#
+ */
+#ifdef @kind@@c@
+#undef @kind@@c@
+#endif
+#ifndef HAVE_MODF@C@
+NPY_INPLACE @type@
+npy_@kind@@c@(@type@ x, @type@ y)
+{
+    int are_inputs_inf = (npy_isinf(x) && npy_isinf(y));
+    /* force set invalid flag, doesnt raise by default on gcc < 8 */
+    if (npy_isnan(x) || npy_isnan(y)) {
+        npy_set_floatstatus_invalid();
+    }
+    if (are_inputs_inf || !y) {
+        if (!npy_isnan(x)) {
+            npy_set_floatstatus_invalid();
+        }
+    }
+    return (@type@) npy_@kind@((double)x, (double) y);
+}
+#endif
+/**end repeat1**/
+
 #ifdef modf@c@
 #undef modf@c@
 #endif
@@ -473,8 +499,8 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x)
 /**end repeat1**/
 
 /**begin repeat1
- * #kind = atan2,hypot,pow,fmod,copysign#
- * #KIND = ATAN2,HYPOT,POW,FMOD,COPYSIGN#
+ * #kind = atan2,hypot,pow,copysign#
+ * #KIND = ATAN2,HYPOT,POW,COPYSIGN#
  */
 #ifdef HAVE_@KIND@@C@
 NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y)
@@ -484,6 +510,29 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y)
 #endif
 /**end repeat1**/
 
+/**begin repeat1
+ * #kind = fmod#
+ * #KIND = FMOD#
+ */
+#ifdef HAVE_FMOD@C@
+NPY_INPLACE @type@
+npy_@kind@@c@(@type@ x, @type@ y)
+{
+    int are_inputs_inf = (npy_isinf(x) && npy_isinf(y));
+    /* force set invalid flag, doesnt raise by default on gcc < 8 */
+    if (npy_isnan(x) || npy_isnan(y)) {
+        npy_set_floatstatus_invalid();
+    }
+    if (are_inputs_inf || !y) {
+        if (!npy_isnan(x)) {
+            npy_set_floatstatus_invalid();
+        }
+    }
+    return @kind@@c@(x, y);
+}
+#endif
+/**end repeat1**/
+
 #ifdef HAVE_MODF@C@
 NPY_INPLACE @type@ npy_modf@c@(@type@ x, @type@ *iptr)
 {
@@ -625,6 +674,38 @@ NPY_INPLACE @type@ npy_logaddexp2@c@(@type@ x, @type@ y)
 }
 
 /*
+ * Wrapper function for remainder edge cases
+ * Internally calls npy_divmod*
+ */
+NPY_INPLACE @type@
+npy_remainder@c@(@type@ a, @type@ b)
+{
+    @type@ mod;
+    if (NPY_UNLIKELY(!b)) {
+        mod = npy_fmod@c@(a, b);
+    } else {
+        npy_divmod@c@(a, b, &mod);
+    }
+    return mod;
+}
+
+NPY_INPLACE @type@
+npy_floor_divide@c@(@type@ a, @type@ b) {
+    @type@ div, mod;
+    if (NPY_UNLIKELY(!b)) {
+        div = a / b;
+        if (!a || npy_isnan(a)) {
+            npy_set_floatstatus_invalid();
+        } else {
+            npy_set_floatstatus_divbyzero();
+        }
+    } else {
+        div = npy_divmod@c@(a, b, &mod);
+    }
+    return div;
+}
+
+/*
  * Python version of divmod.
  *
  * The implementation is mostly copied from cpython 3.5.
@@ -634,12 +715,19 @@ npy_divmod@c@(@type@ a, @type@ b, @type@ *modulus)
 {
     @type@ div, mod, floordiv;
 
+    /* force set invalid flag, doesnt raise by default on gcc < 8 */
+    if (npy_isnan(a) || npy_isnan(b)) {
+        npy_set_floatstatus_invalid();
+    }
     mod = npy_fmod@c@(a, b);
-
-    if (!b) {
+    if (NPY_UNLIKELY(!b)) {
+        div = a / b;
+        if (a && !npy_isnan(a)) {
+            npy_set_floatstatus_divbyzero();
+        }
         /* If b == 0, return result of fmod. For IEEE is nan */
         *modulus = mod;
-        return mod;
+        return div;
     }
 
     /* a - mod should be very nearly an integer multiple of b */
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index c9efdeb4e..c2e06a4fd 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1955,8 +1955,7 @@ NPY_NO_EXPORT void
     BINARY_LOOP {
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
-        @type@ mod;
-        *((@type@ *)op1) = npy_divmod@c@(in1, in2, &mod);
+        *((@type@ *)op1) = npy_floor_divide@c@(in1, in2);
     }
 }
 
@@ -1966,7 +1965,7 @@ NPY_NO_EXPORT void
     BINARY_LOOP {
         const @type@ in1 = *(@type@ *)ip1;
         const @type@ in2 = *(@type@ *)ip2;
-        npy_divmod@c@(in1, in2, (@type@ *)op1);
+        *((@type@ *) op1) = npy_remainder@c@(in1, in2);
     }
 }
 
@@ -2306,8 +2305,13 @@ HALF_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps
     BINARY_LOOP {
         const npy_half in1 = *(npy_half *)ip1;
         const npy_half in2 = *(npy_half *)ip2;
-        npy_half mod;
-        *((npy_half *)op1) = npy_half_divmod(in1, in2, &mod);
+
+        float fh1 = npy_half_to_float(in1);
+        float fh2 = npy_half_to_float(in2);
+        float div;
+
+        div = npy_floor_dividef(fh1, fh2);
+        *((npy_half *)op1) = npy_float_to_half(div);
     }
 }
 
@@ -2317,7 +2321,11 @@ HALF_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, v
     BINARY_LOOP {
         const npy_half in1 = *(npy_half *)ip1;
         const npy_half in2 = *(npy_half *)ip2;
-        npy_half_divmod(in1, in2, (npy_half *)op1);
+        float fh1 = npy_half_to_float(in1);
+        float fh2 = npy_half_to_float(in2);
+        float mod;
+        mod = npy_remainderf(fh1, fh2);
+        *((npy_half *)op1) = npy_float_to_half(mod);
     }
 }
 
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 55bc958cb..86dade0f1 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -285,7 +285,11 @@ static void
 @name@_ctype_floor_divide(@type@ a, @type@ b, @type@ *out) {
     @type@ mod;
 
-    *out = npy_divmod@c@(a, b, &mod);
+    if (!b) {
+        *out = a / b;
+    } else {
+        *out = npy_divmod@c@(a, b, &mod);
+    }
 }
 
 
@@ -318,7 +322,11 @@ static void
 half_ctype_floor_divide(npy_half a, npy_half b, npy_half *out) {
     npy_half mod;
 
-    *out = npy_half_divmod(a, b, &mod);
+    if (!b) {
+        *out = a / b;
+    } else {
+        *out = npy_half_divmod(a, b, &mod);
+    }
 }
 
 
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index 24730f969..0f42f7076 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -281,6 +281,19 @@ def test_array_astype():
     a = np.array(1000, dtype='i4')
     assert_raises(TypeError, a.astype, 'U1', casting='safe')
 
+
+@pytest.mark.parametrize("dt", ["d", "f", "S13", "U32"])
+def test_array_astype_to_void(dt):
+    dt = np.dtype(dt)
+    arr = np.array([], dtype=dt)
+    assert arr.astype("V").dtype.itemsize == dt.itemsize
+
+def test_object_array_astype_to_void():
+    # This is different to `test_array_astype_to_void` as object arrays
+    # are inspected.  The default void is "V8" (8 is the length of double)
+    arr = np.array([], dtype="O").astype("V")
+    assert arr.dtype == "V8"
+
 @pytest.mark.parametrize("t",
     np.sctypes['uint'] + np.sctypes['int'] + np.sctypes['float']
 )
diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py
new file mode 100644
index 000000000..fec0ae7c7
--- /dev/null
+++ b/numpy/core/tests/test_casting_unittests.py
@@ -0,0 +1,301 @@
+"""
+The tests exercise the casting machinery in a more low-level manner.
+The reason is mostly to test a new implementation of the casting machinery.
+
+Unlike most tests in NumPy, these are closer to unit-tests rather
+than integration tests.
+"""
+
+import pytest
+import textwrap
+import enum
+
+import numpy as np
+
+from numpy.core._multiarray_umath import (
+    _get_castingimpl as get_castingimpl)
+from numpy.core._multiarray_tests import uses_new_casts
+
+
+# Simple skips object, parametric and long double (unsupported by struct)
+simple_dtypes = "?bhilqBHILQefdFD"
+if np.dtype("l").itemsize != np.dtype("q").itemsize:
+    # Remove l and L, the table was generated with 64bit linux in mind.
+    # TODO: Should have two tables or no a different solution.
+    simple_dtypes = simple_dtypes.replace("l", "").replace("L", "")
+simple_dtypes = [type(np.dtype(c)) for c in simple_dtypes]
+
+
+def simple_dtype_instances():
+    for dtype_class in simple_dtypes:
+        dt = dtype_class()
+        yield pytest.param(dt, id=str(dt))
+        if dt.byteorder != "|":
+            dt = dt.newbyteorder()
+            yield pytest.param(dt, id=str(dt))
+
+
+def get_expected_stringlength(dtype):
+    """Returns the string length when casting the basic dtypes to strings.
+    """
+    if dtype == np.bool_:
+        return 5
+    if dtype.kind in "iu":
+        if dtype.itemsize == 1:
+            length = 3
+        elif dtype.itemsize == 2:
+            length = 5
+        elif dtype.itemsize == 4:
+            length = 10
+        elif dtype.itemsize == 8:
+            length = 20
+        else:
+            raise AssertionError(f"did not find expected length for {dtype}")
+
+        if dtype.kind == "i":
+            length += 1  # adds one character for the sign
+
+        return length
+
+    # Note: Can't do dtype comparison for longdouble on windows
+    if dtype.char == "g":
+        return 48
+    elif dtype.char == "G":
+        return 48 * 2
+    elif dtype.kind == "f":
+        return 32  # also for half apparently.
+    elif dtype.kind == "c":
+        return 32 * 2
+
+    raise AssertionError(f"did not find expected length for {dtype}")
+
+
+class Casting(enum.IntEnum):
+    no = 0
+    equiv = 1
+    safe = 2
+    same_kind = 3
+    unsafe = 4
+    cast_is_view = 1 << 16
+
+
+def _get_cancast_table():
+    table = textwrap.dedent("""
+        X ? b h i l q B H I L Q e f d g F D G S U V O M m
+        ? # = = = = = = = = = = = = = = = = = = = = = . =
+        b . # = = = = . . . . . = = = = = = = = = = = . =
+        h . ~ # = = = . . . . . ~ = = = = = = = = = = . =
+        i . ~ ~ # = = . . . . . ~ ~ = = ~ = = = = = = . =
+        l . ~ ~ ~ # # . . . . . ~ ~ = = ~ = = = = = = . =
+        q . ~ ~ ~ # # . . . . . ~ ~ = = ~ = = = = = = . =
+        B . ~ = = = = # = = = = = = = = = = = = = = = . =
+        H . ~ ~ = = = ~ # = = = ~ = = = = = = = = = = . =
+        I . ~ ~ ~ = = ~ ~ # = = ~ ~ = = ~ = = = = = = . =
+        L . ~ ~ ~ ~ ~ ~ ~ ~ # # ~ ~ = = ~ = = = = = = . ~
+        Q . ~ ~ ~ ~ ~ ~ ~ ~ # # ~ ~ = = ~ = = = = = = . ~
+        e . . . . . . . . . . . # = = = = = = = = = = . .
+        f . . . . . . . . . . . ~ # = = = = = = = = = . .
+        d . . . . . . . . . . . ~ ~ # = ~ = = = = = = . .
+        g . . . . . . . . . . . ~ ~ ~ # ~ ~ = = = = = . .
+        F . . . . . . . . . . . . . . . # = = = = = = . .
+        D . . . . . . . . . . . . . . . ~ # = = = = = . .
+        G . . . . . . . . . . . . . . . ~ ~ # = = = = . .
+        S . . . . . . . . . . . . . . . . . . # = = = . .
+        U . . . . . . . . . . . . . . . . . . . # = = . .
+        V . . . . . . . . . . . . . . . . . . . . # = . .
+        O . . . . . . . . . . . . . . . . . . . . = # . .
+        M . . . . . . . . . . . . . . . . . . . . = = # .
+        m . . . . . . . . . . . . . . . . . . . . = = . #
+        """).strip().split("\n")
+    dtypes = [type(np.dtype(c)) for c in table[0][2::2]]
+
+    convert_cast = {".": Casting.unsafe, "~": Casting.same_kind,
+                    "=": Casting.safe, "#": Casting.equiv,
+                    " ": -1}
+
+    cancast = {}
+    for from_dt, row in zip(dtypes, table[1:]):
+        cancast[from_dt] = {}
+        for to_dt, c in zip(dtypes, row[2::2]):
+            cancast[from_dt][to_dt] = convert_cast[c]
+
+    return cancast
+
+CAST_TABLE = _get_cancast_table()
+
+
+class TestChanges:
+    """
+    These test cases excercise some behaviour changes
+    """
+    @pytest.mark.parametrize("string", ["S", "U"])
+    @pytest.mark.parametrize("floating", ["e", "f", "d", "g"])
+    def test_float_to_string(self, floating, string):
+        assert np.can_cast(floating, string)
+        # 100 is long enough to hold any formatted floating
+        if uses_new_casts():
+            assert np.can_cast(floating, f"{string}100")
+        else:
+            assert not np.can_cast(floating, f"{string}100")
+            assert np.can_cast(floating, f"{string}100", casting="same_kind")
+
+    def test_to_void(self):
+        # But in general, we do consider these safe:
+        assert np.can_cast("d", "V")
+        assert np.can_cast("S20", "V")
+
+        # Do not consider it a safe cast if the void is too smaller:
+        if uses_new_casts():
+            assert not np.can_cast("d", "V1")
+            assert not np.can_cast("S20", "V1")
+            assert not np.can_cast("U1", "V1")
+            # Structured to unstructured is just like any other:
+            assert np.can_cast("d,i", "V", casting="same_kind")
+        else:
+            assert np.can_cast("d", "V1")
+            assert np.can_cast("S20", "V1")
+            assert np.can_cast("U1", "V1")
+            assert not np.can_cast("d,i", "V", casting="same_kind")
+
+
+class TestCasting:
+    @pytest.mark.parametrize("from_Dt", simple_dtypes)
+    def test_simple_cancast(self, from_Dt):
+        for to_Dt in simple_dtypes:
+            cast = get_castingimpl(from_Dt, to_Dt)
+
+            for from_dt in [from_Dt(), from_Dt().newbyteorder()]:
+                default = cast._resolve_descriptors((from_dt, None))[1][1]
+                assert default == to_Dt()
+                del default
+
+                for to_dt in [to_Dt(), to_Dt().newbyteorder()]:
+                    casting, (from_res, to_res) = cast._resolve_descriptors(
+                        (from_dt, to_dt))
+                    assert(type(from_res) == from_Dt)
+                    assert(type(to_res) == to_Dt)
+                    if casting & Casting.cast_is_view:
+                        # If a view is acceptable, this is "no" casting
+                        # and byte order must be matching.
+                        assert casting == Casting.no | Casting.cast_is_view
+                        # The above table lists this as "equivalent"
+                        assert Casting.equiv == CAST_TABLE[from_Dt][to_Dt]
+                        # Note that to_res may not be the same as from_dt
+                        assert from_res.isnative == to_res.isnative
+                    else:
+                        if from_Dt == to_Dt:
+                            # Note that to_res may not be the same as from_dt
+                            assert from_res.isnative != to_res.isnative
+                        assert casting == CAST_TABLE[from_Dt][to_Dt]
+
+                    if from_Dt is to_Dt:
+                        assert(from_dt is from_res)
+                        assert(to_dt is to_res)
+
+
+    def string_with_modified_length(self, dtype, change_length):
+        fact = 1 if dtype.char == "S" else 4
+        length = dtype.itemsize // fact + change_length
+        return np.dtype(f"{dtype.byteorder}{dtype.char}{length}")
+
+    @pytest.mark.parametrize("other_DT", simple_dtypes)
+    @pytest.mark.parametrize("string_char", ["S", "U"])
+    def test_string_cancast(self, other_DT, string_char):
+        fact = 1 if string_char == "S" else 4
+
+        string_DT = type(np.dtype(string_char))
+        cast = get_castingimpl(other_DT, string_DT)
+
+        other_dt = other_DT()
+        expected_length = get_expected_stringlength(other_dt)
+        string_dt = np.dtype(f"{string_char}{expected_length}")
+
+        safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None))
+        assert res_dt.itemsize == expected_length * fact
+        assert safety == Casting.safe  # we consider to string casts "safe"
+        assert isinstance(res_dt, string_DT)
+
+        # These casts currently implement changing the string length, so
+        # check the cast-safety for too long/fixed string lengths:
+        for change_length in [-1, 0, 1]:
+            if change_length >= 0:
+                expected_safety = Casting.safe
+            else:
+                expected_safety = Casting.same_kind
+
+            to_dt = self.string_with_modified_length(string_dt, change_length)
+            safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt))
+            assert res_dt is to_dt
+            assert safety == expected_safety
+
+        # The opposite direction is always considered unsafe:
+        cast = get_castingimpl(string_DT, other_DT)
+
+        safety, _ = cast._resolve_descriptors((string_dt, other_dt))
+        assert safety == Casting.unsafe
+
+        cast = get_castingimpl(string_DT, other_DT)
+        safety, (_, res_dt) = cast._resolve_descriptors((string_dt, None))
+        assert safety == Casting.unsafe
+        assert other_dt is res_dt  # returns the singleton for simple dtypes
+
+    @pytest.mark.parametrize("other_dt", ["S8", "<U8", ">U8"])
+    @pytest.mark.parametrize("string_char", ["S", "U"])
+    def test_string_to_string_cancast(self, other_dt, string_char):
+        other_dt = np.dtype(other_dt)
+
+        fact = 1 if string_char == "S" else 4
+        div = 1 if other_dt.char == "S" else 4
+
+        string_DT = type(np.dtype(string_char))
+        cast = get_castingimpl(type(other_dt), string_DT)
+
+        expected_length = other_dt.itemsize // div
+        string_dt = np.dtype(f"{string_char}{expected_length}")
+
+        safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None))
+        assert res_dt.itemsize == expected_length * fact
+        assert isinstance(res_dt, string_DT)
+
+        if other_dt.char == string_char:
+            if other_dt.isnative:
+                expected_safety = Casting.no | Casting.cast_is_view
+            else:
+                expected_safety = Casting.equiv
+        elif string_char == "U":
+            expected_safety = Casting.safe
+        else:
+            expected_safety = Casting.unsafe
+
+        assert expected_safety == safety
+
+        for change_length in [-1, 0, 1]:
+            to_dt = self.string_with_modified_length(string_dt, change_length)
+            safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt))
+
+            assert res_dt is to_dt
+            if expected_safety == Casting.unsafe:
+                assert safety == expected_safety
+            elif change_length < 0:
+                assert safety == Casting.same_kind
+            elif change_length == 0:
+                assert safety == expected_safety
+            elif change_length > 0:
+                assert safety == Casting.safe
+
+    def test_void_to_string_special_case(self):
+        # Cover a small special case in void to string casting that could
+        # probably just as well be turned into an error (compare
+        # `test_object_to_parametric_internal_error` below).
+        assert np.array([], dtype="V5").astype("S").dtype.itemsize == 5
+        assert np.array([], dtype="V5").astype("U").dtype.itemsize == 4 * 5
+
+    def test_object_to_parametric_internal_error(self):
+        # We reject casting from object to a parametric type, without
+        # figuring out the correct instance first.
+        object_dtype = type(np.dtype(object))
+        other_dtype = type(np.dtype(str))
+        cast = get_castingimpl(object_dtype, other_dtype)
+        with pytest.raises(TypeError,
+                    match="casting from object to the parametric DType"):
+            cast._resolve_descriptors((np.dtype("O"), None))
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 380b78f67..a67fe62c3 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -771,3 +771,17 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
                 np.array(arr, dtype="(2,2)f")
 
         self.assert_deprecated(check)
+
+
+class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase):
+    # Deprecated 2020-11-24, NumPy 1.20
+    """
+    Technically, it should be impossible to create numpy object scalars,
+    but there was an unpickle path that would in theory allow it. That
+    path is invalid and must lead to the warning.
+    """
+    message = "Unpickling a scalar with object dtype is deprecated."
+
+    def test_deprecated(self):
+        ctor = np.core.multiarray.scalar
+        self.assert_deprecated(lambda: ctor(np.dtype("O"), 1))
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index 1b2b85cc1..0ebcc72da 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -153,6 +153,9 @@ class TestBuiltin:
                       'formats': ['f4', 'i4'],
                       'offsets': [4, 0]})
         assert_equal(x == y, False)
+        # But it is currently an equivalent cast:
+        assert np.can_cast(x, y, casting="equiv")
+
 
 class TestRecord:
     def test_equivalent_record(self):
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 61806f99f..12306cbb8 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -7526,6 +7526,25 @@ class TestNewBufferProtocol:
         f.a = 3
         assert_equal(arr['a'], 3)
 
+    @pytest.mark.parametrize("obj", [np.ones(3), np.ones(1, dtype="i,i")[()]])
+    def test_error_if_stored_buffer_info_is_corrupted(self, obj):
+        """
+        If a user extends a NumPy array before 1.20 and then runs it
+        on NumPy 1.20+. A C-subclassed array might in theory modify
+        the new buffer-info field. This checks that an error is raised
+        if this happens (for buffer export), an error is written on delete.
+        This is a sanity check to help users transition to safe code, it
+        may be deleted at any point.
+        """
+        # corrupt buffer info:
+        _multiarray_tests.corrupt_or_fix_bufferinfo(obj)
+        name = type(obj)
+        with pytest.raises(RuntimeError,
+                    match=f".*{name} appears to be C subclassed"):
+            memoryview(obj)
+        # Fix buffer info again before we delete (or we lose the memory)
+        _multiarray_tests.corrupt_or_fix_bufferinfo(obj)
+
 
 class TestArrayAttributeDeletion:
 
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index f5428f98c..866a96e31 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -922,6 +922,25 @@ class TestTypes:
         assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20'))
         assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30'))
 
+    @pytest.mark.parametrize(["dtype1", "dtype2"],
+            [[np.dtype("V6"), np.dtype("V10")],
+             [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])],
+             [np.dtype("i8,i8"), np.dtype("i4,i4")],
+            ])
+    def test_invalid_void_promotion(self, dtype1, dtype2):
+        # Mainly test structured void promotion, which currently allows
+        # byte-swapping, but nothing else:
+        with pytest.raises(TypeError):
+            np.promote_types(dtype1, dtype2)
+
+    @pytest.mark.parametrize(["dtype1", "dtype2"],
+            [[np.dtype("V10"), np.dtype("V10")],
+             [np.dtype([("name1", "<i8")]), np.dtype([("name1", ">i8")])],
+             [np.dtype("i8,i8"), np.dtype("i8,>i8")],
+            ])
+    def test_valid_void_promotion(self, dtype1, dtype2):
+        assert np.promote_types(dtype1, dtype2) is dtype1
+
     @pytest.mark.parametrize("dtype",
            list(np.typecodes["All"]) +
            ["i,i", "S3", "S100", "U3", "U100", rational])
diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py
index f28ad5ac9..4d4b4b515 100644
--- a/numpy/core/tests/test_records.py
+++ b/numpy/core/tests/test_records.py
@@ -424,7 +424,16 @@ class TestRecord:
         # make sure we did not pickle the address
         assert not isinstance(obj, bytes)
 
-        assert_raises(TypeError, ctor, dtype, 13)
+        assert_raises(RuntimeError, ctor, dtype, 13)
+
+        # Test roundtrip:
+        dump = pickle.dumps(a[0])
+        unpickled = pickle.loads(dump)
+        assert a[0] == unpickled
+
+        # Also check the similar (impossible) "object scalar" path:
+        with pytest.warns(DeprecationWarning):
+            assert ctor(np.dtype("O"), data) is data
 
     def test_objview_record(self):
         # https://github.com/numpy/numpy/issues/2599
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index c7f44cf50..d8529418e 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -276,6 +276,10 @@ class TestModulus:
         # Check nans, inf
         with suppress_warnings() as sup:
             sup.filter(RuntimeWarning, "invalid value encountered in remainder")
+            sup.filter(RuntimeWarning, "divide by zero encountered in remainder")
+            sup.filter(RuntimeWarning, "divide by zero encountered in floor_divide")
+            sup.filter(RuntimeWarning, "divide by zero encountered in divmod")
+            sup.filter(RuntimeWarning, "invalid value encountered in divmod")
             for dt in np.typecodes['Float']:
                 fone = np.array(1.0, dtype=dt)
                 fzer = np.array(0.0, dtype=dt)
@@ -290,6 +294,9 @@ class TestModulus:
                 assert_(np.isnan(rem), 'dt: %s' % dt)
                 rem = operator.mod(finf, fone)
                 assert_(np.isnan(rem), 'dt: %s' % dt)
+                for op in [floordiv_and_mod, divmod]:
+                    div, mod = op(fone, fzer)
+                    assert_(np.isinf(div)) and assert_(np.isnan(mod))
 
     def test_inplace_floordiv_handling(self):
         # issue gh-12927
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index f57493e9c..3f89cc59b 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -13,7 +13,7 @@ from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_raises_regex,
     assert_array_equal, assert_almost_equal, assert_array_almost_equal,
     assert_array_max_ulp, assert_allclose, assert_no_warnings, suppress_warnings,
-    _gen_alignment_data, assert_array_almost_equal_nulp
+    _gen_alignment_data, assert_array_almost_equal_nulp, assert_warns
     )
 
 def on_powerpc():
@@ -293,6 +293,42 @@ class TestDivision:
         assert_equal(np.signbit(x//1), 0)
         assert_equal(np.signbit((-x)//1), 1)
 
+    @pytest.mark.parametrize('dtype', np.typecodes['Float'])
+    def test_floor_division_errors(self, dtype):
+        fnan = np.array(np.nan, dtype=dtype)
+        fone = np.array(1.0, dtype=dtype)
+        fzer = np.array(0.0, dtype=dtype)
+        finf = np.array(np.inf, dtype=dtype)
+        # divide by zero error check
+        with np.errstate(divide='raise', invalid='ignore'):
+            assert_raises(FloatingPointError, np.floor_divide, fone, fzer)
+        with np.errstate(invalid='raise'):
+            assert_raises(FloatingPointError, np.floor_divide, fnan, fone)
+            assert_raises(FloatingPointError, np.floor_divide, fone, fnan)
+            assert_raises(FloatingPointError, np.floor_divide, fnan, fzer)
+
+    @pytest.mark.parametrize('dtype', np.typecodes['Float'])
+    def test_floor_division_corner_cases(self, dtype):
+        # test corner cases like 1.0//0.0 for errors and return vals
+        x = np.zeros(10, dtype=dtype)
+        y = np.ones(10, dtype=dtype)
+        fnan = np.array(np.nan, dtype=dtype)
+        fone = np.array(1.0, dtype=dtype)
+        fzer = np.array(0.0, dtype=dtype)
+        finf = np.array(np.inf, dtype=dtype)
+        with suppress_warnings() as sup:
+            sup.filter(RuntimeWarning, "invalid value encountered in floor_divide")
+            div = np.floor_divide(fnan, fone)
+            assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div)
+            div = np.floor_divide(fone, fnan)
+            assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div)
+            div = np.floor_divide(fnan, fzer)
+            assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div)
+        # verify 1.0//0.0 computations return inf
+        with np.errstate(divide='ignore'):
+            z = np.floor_divide(y, x)
+            assert_(np.isinf(z).all())
+
 def floor_divide_and_remainder(x, y):
     return (np.floor_divide(x, y), np.remainder(x, y))
 
@@ -366,9 +402,90 @@ class TestRemainder:
                     else:
                         assert_(b > rem >= 0, msg)
 
+    @pytest.mark.parametrize('dtype', np.typecodes['Float'])
+    def test_float_divmod_errors(self, dtype):
+        # Check valid errors raised for divmod and remainder
+        fzero = np.array(0.0, dtype=dtype)
+        fone = np.array(1.0, dtype=dtype)
+        finf = np.array(np.inf, dtype=dtype)
+        fnan = np.array(np.nan, dtype=dtype)
+        # since divmod is combination of both remainder and divide
+        # ops it will set both dividebyzero and invalid flags
+        with np.errstate(divide='raise', invalid='ignore'):
+            assert_raises(FloatingPointError, np.divmod, fone, fzero)
+        with np.errstate(divide='ignore', invalid='raise'):
+            assert_raises(FloatingPointError, np.divmod, fone, fzero)
+        with np.errstate(invalid='raise'):
+            assert_raises(FloatingPointError, np.divmod, fzero, fzero)
+        with np.errstate(invalid='raise'):
+            assert_raises(FloatingPointError, np.divmod, finf, finf)
+        with np.errstate(divide='ignore', invalid='raise'):
+            assert_raises(FloatingPointError, np.divmod, finf, fzero)
+        with np.errstate(divide='raise', invalid='ignore'):
+            assert_raises(FloatingPointError, np.divmod, finf, fzero)
+
+    @pytest.mark.parametrize('dtype', np.typecodes['Float'])
+    @pytest.mark.parametrize('fn', [np.fmod, np.remainder])
+    def test_float_remainder_errors(self, dtype, fn):
+        fzero = np.array(0.0, dtype=dtype)
+        fone = np.array(1.0, dtype=dtype)
+        finf = np.array(np.inf, dtype=dtype)
+        fnan = np.array(np.nan, dtype=dtype)
+        with np.errstate(invalid='raise'):
+            assert_raises(FloatingPointError, fn, fone, fzero)
+            assert_raises(FloatingPointError, fn, fnan, fzero)
+            assert_raises(FloatingPointError, fn, fone, fnan)
+            assert_raises(FloatingPointError, fn, fnan, fone)
+
+    def test_float_remainder_overflow(self):
+        a = np.finfo(np.float64).tiny
+        with np.errstate(over='ignore', invalid='ignore'):
+            div, mod = np.divmod(4, a)
+            np.isinf(div)
+            assert_(mod == 0)
+        with np.errstate(over='raise', invalid='ignore'):
+            assert_raises(FloatingPointError, np.divmod, 4, a)
+        with np.errstate(invalid='raise', over='ignore'):
+            assert_raises(FloatingPointError, np.divmod, 4, a)
+
+    def test_float_divmod_corner_cases(self):
+        # check nan cases
+        for dt in np.typecodes['Float']:
+            fnan = np.array(np.nan, dtype=dt)
+            fone = np.array(1.0, dtype=dt)
+            fzer = np.array(0.0, dtype=dt)
+            finf = np.array(np.inf, dtype=dt)
+            with suppress_warnings() as sup:
+                sup.filter(RuntimeWarning, "invalid value encountered in divmod")
+                sup.filter(RuntimeWarning, "divide by zero encountered in divmod")
+                div, rem = np.divmod(fone, fzer)
+                assert(np.isinf(div)), 'dt: %s, div: %s' % (dt, rem)
+                assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem)
+                div, rem = np.divmod(fzer, fzer)
+                assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem)
+                assert_(np.isnan(div)), 'dt: %s, rem: %s' % (dt, rem)
+                div, rem = np.divmod(finf, finf)
+                assert(np.isnan(div)), 'dt: %s, rem: %s' % (dt, rem)
+                assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem)
+                div, rem = np.divmod(finf, fzer)
+                assert(np.isinf(div)), 'dt: %s, rem: %s' % (dt, rem)
+                assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem)
+                div, rem = np.divmod(fnan, fone)
+                assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem)
+                assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem)
+                div, rem = np.divmod(fone, fnan)
+                assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem)
+                assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem)
+                div, rem = np.divmod(fnan, fzer)
+                assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem)
+                assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem)
+
     def test_float_remainder_corner_cases(self):
         # Check remainder magnitude.
         for dt in np.typecodes['Float']:
+            fone = np.array(1.0, dtype=dt)
+            fzer = np.array(0.0, dtype=dt)
+            fnan = np.array(np.nan, dtype=dt)
             b = np.array(1.0, dtype=dt)
             a = np.nextafter(np.array(0.0, dtype=dt), -b)
             rem = np.remainder(a, b)
@@ -379,6 +496,7 @@ class TestRemainder:
         # Check nans, inf
         with suppress_warnings() as sup:
             sup.filter(RuntimeWarning, "invalid value encountered in remainder")
+            sup.filter(RuntimeWarning, "invalid value encountered in fmod")
             for dt in np.typecodes['Float']:
                 fone = np.array(1.0, dtype=dt)
                 fzer = np.array(0.0, dtype=dt)
@@ -389,10 +507,30 @@ class TestRemainder:
                 # MSVC 2008 returns NaN here, so disable the check.
                 #rem = np.remainder(fone, finf)
                 #assert_(rem == fone, 'dt: %s, rem: %s' % (dt, rem))
+                rem = np.remainder(finf, fone)
+                fmod = np.fmod(finf, fone)
+                assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod))
+                assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem))
+                rem = np.remainder(finf, finf)
+                fmod = np.fmod(finf, fone)
+                assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem))
+                assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod))
+                rem = np.remainder(finf, fzer)
+                fmod = np.fmod(finf, fzer)
+                assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem))
+                assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod))
                 rem = np.remainder(fone, fnan)
+                fmod = np.fmod(fone, fnan)
                 assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem))
-                rem = np.remainder(finf, fone)
+                assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod))
+                rem = np.remainder(fnan, fzer)
+                fmod = np.fmod(fnan, fzer)
+                assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem))
+                assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem))
+                rem = np.remainder(fnan, fone)
+                fmod = np.fmod(fnan, fone)
                 assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem))
+                assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem))
 
 
 class TestCbrt:
diff --git a/numpy/distutils/unixccompiler.py b/numpy/distutils/unixccompiler.py
index 9bb7251d8..0cd2d243e 100644
--- a/numpy/distutils/unixccompiler.py
+++ b/numpy/distutils/unixccompiler.py
@@ -3,6 +3,8 @@ unixccompiler - can handle very long argument lists for ar.
 
 """
 import os
+import sys
+import subprocess
 
 from distutils.errors import CompileError, DistutilsExecError, LibError
 from distutils.unixccompiler import UnixCCompiler
@@ -56,6 +58,11 @@ def UnixCCompiler__compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts
 
     # add commandline flags to dependency file
     if deps:
+        # After running the compiler, the file created will be in EBCDIC
+        # but will not be tagged as such. This tags it so the file does not
+        # have multiple different encodings being written to it
+        if sys.platform == 'zos':
+            subprocess.check_output(['chtag', '-tc', 'IBM1047', obj + '.d'])
         with open(obj + '.d', 'a') as f:
             f.write(_commandline_dep_string(cc_args, extra_postargs, pp_opts))
 
diff --git a/numpy/f2py/__version__.py b/numpy/f2py/__version__.py
index 104c2e1a8..e20d7c1db 100644
--- a/numpy/f2py/__version__.py
+++ b/numpy/f2py/__version__.py
@@ -1,8 +1 @@
-major = 2
-
-try:
-    from __svn_version__ import version
-    version_info = (major, version)
-    version = '%s_%s' % version_info
-except (ImportError, ValueError):
-    version = str(major)
+from numpy.version import version
diff --git a/numpy/f2py/capi_maps.py b/numpy/f2py/capi_maps.py
index fabbfc4c2..472ddde43 100644
--- a/numpy/f2py/capi_maps.py
+++ b/numpy/f2py/capi_maps.py
@@ -11,8 +11,6 @@ $Date: 2005/05/06 10:57:33 $
 Pearu Peterson
 
 """
-__version__ = "$Revision: 1.60 $"[10:-1]
-
 from . import __version__
 f2py_version = __version__.version
 
diff --git a/numpy/f2py/common_rules.py b/numpy/f2py/common_rules.py
index 90483e55b..937d8bc72 100644
--- a/numpy/f2py/common_rules.py
+++ b/numpy/f2py/common_rules.py
@@ -13,8 +13,6 @@ $Date: 2005/05/06 10:57:33 $
 Pearu Peterson
 
 """
-__version__ = "$Revision: 1.19 $"[10:-1]
-
 from . import __version__
 f2py_version = __version__.version
 
diff --git a/numpy/f2py/f2py2e.py b/numpy/f2py/f2py2e.py
index be2c345d1..b45d985aa 100755
--- a/numpy/f2py/f2py2e.py
+++ b/numpy/f2py/f2py2e.py
@@ -29,18 +29,14 @@ from . import __version__
 from . import capi_maps
 
 f2py_version = __version__.version
+numpy_version = __version__.version
 errmess = sys.stderr.write
 # outmess=sys.stdout.write
 show = pprint.pprint
 outmess = auxfuncs.outmess
 
-try:
-    from numpy import __version__ as numpy_version
-except ImportError:
-    numpy_version = 'N/A'
-
-__usage__ = """\
-Usage:
+__usage__ =\
+f"""Usage:
 
 1) To construct extension module sources:
 
@@ -97,8 +93,8 @@ Options:
   --[no-]latex-doc Create (or not) <modulename>module.tex.
                    Default is --no-latex-doc.
   --short-latex    Create 'incomplete' LaTeX document (without commands
-                   \\documentclass, \\tableofcontents, and \\begin{document},
-                   \\end{document}).
+                   \\documentclass, \\tableofcontents, and \\begin{{document}},
+                   \\end{{document}}).
 
   --[no-]rest-doc Create (or not) <modulename>module.rst.
                    Default is --no-rest-doc.
@@ -167,12 +163,12 @@ Extra options (only effective with -c):
   array. Integer <int> sets the threshold for array sizes when
   a message should be shown.
 
-Version:     %s
-numpy Version: %s
+Version:     {f2py_version}
+numpy Version: {numpy_version}
 Requires:    Python 3.5 or higher.
 License:     NumPy license (see LICENSE.txt in the NumPy source code)
 Copyright 1999 - 2011 Pearu Peterson all rights reserved.
-http://cens.ioc.ee/projects/f2py2e/""" % (f2py_version, numpy_version)
+http://cens.ioc.ee/projects/f2py2e/"""
 
 
 def scaninputline(inputline):
diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py
index a14f60194..f1490527e 100755
--- a/numpy/f2py/rules.py
+++ b/numpy/f2py/rules.py
@@ -50,18 +50,15 @@ $Date: 2005/08/30 08:58:42 $
 Pearu Peterson
 
 """
-__version__ = "$Revision: 1.129 $"[10:-1]
-
-from . import __version__
-f2py_version = __version__.version
-
-from .. import version as _numpy_version
-numpy_version = _numpy_version.version
-
 import os
 import time
 import copy
 
+# __version__.version is now the same as the NumPy version
+from . import __version__
+f2py_version = __version__.version
+numpy_version = __version__.version
+
 from .auxfuncs import (
     applyrules, debugcapi, dictappend, errmess, gentitle, getargs2,
     hascallstatement, hasexternals, hasinitvalue, hasnote, hasresultnote,
@@ -202,7 +199,7 @@ PyMODINIT_FUNC PyInit_#modulename#(void) {
 \tif (PyErr_Occurred())
 \t\t{PyErr_SetString(PyExc_ImportError, \"can't initialize module #modulename# (failed to import numpy)\"); return m;}
 \td = PyModule_GetDict(m);
-\ts = PyUnicode_FromString(\"$R""" + """evision: $\");
+\ts = PyUnicode_FromString(\"#f2py_version#\");
 \tPyDict_SetItemString(d, \"__version__\", s);
 \tPy_DECREF(s);
 \ts = PyUnicode_FromString(
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 984f3086e..696fe617b 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1290,7 +1290,7 @@ def _interp_dispatcher(x, xp, fp, left=None, right=None, period=None):
 @array_function_dispatch(_interp_dispatcher)
 def interp(x, xp, fp, left=None, right=None, period=None):
     """
-    One-dimensional linear interpolation.
+    One-dimensional linear interpolation for monotonically increasing sample points.
 
     Returns the one-dimensional piecewise linear interpolant to a function
     with given discrete data points (`xp`, `fp`), evaluated at `x`.
@@ -1337,8 +1337,8 @@ def interp(x, xp, fp, left=None, right=None, period=None):
     --------
     scipy.interpolate
 
-    Notes
-    -----
+    Warnings
+    --------
     The x-coordinate sequence is expected to be increasing, but this is not
     explicitly enforced.  However, if the sequence `xp` is non-increasing,
     interpolation results are meaningless.
diff --git a/numpy/testing/print_coercion_tables.py b/numpy/testing/print_coercion_tables.py
index 8024df128..3a447cd2d 100755
--- a/numpy/testing/print_coercion_tables.py
+++ b/numpy/testing/print_coercion_tables.py
@@ -3,6 +3,7 @@
 
 """
 import numpy as np
+from collections import namedtuple
 
 # Generic object that can be added, but doesn't do anything else
 class GenericObject:
@@ -25,7 +26,17 @@ def print_cancast_table(ntypes):
     for row in ntypes:
         print(row, end=' ')
         for col in ntypes:
-            print(int(np.can_cast(row, col)), end=' ')
+            if np.can_cast(row, col, "equiv"):
+                cast = "#"
+            elif np.can_cast(row, col, "safe"):
+                cast = "="
+            elif np.can_cast(row, col, "same_kind"):
+                cast = "~"
+            elif np.can_cast(row, col, "unsafe"):
+                cast = "."
+            else:
+                cast = " "
+            print(cast, end=' ')
         print()
 
 def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray, use_promote_types=False):
@@ -69,6 +80,101 @@ def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray,
         print()
 
 
+def print_new_cast_table(*, can_cast=True, legacy=False, flags=False):
+    """Prints new casts, the values given are default "can-cast" values, not
+    actual ones.
+    """
+    from numpy.core._multiarray_tests import get_all_cast_information
+
+    cast_table = {
+        0 : "#",  # No cast (classify as equivalent here)
+        1 : "#",  # equivalent casting
+        2 : "=",  # safe casting
+        3 : "~",  # same-kind casting
+        4 : ".",  # unsafe casting
+    }
+    flags_table = {
+        0 : "▗", 7: "█",
+        1: "▚", 2: "▐", 4: "▄",
+                3: "▜", 5: "▙",
+                        6: "▟",
+    }
+
+    cast_info = namedtuple("cast_info", ["can_cast", "legacy", "flags"])
+    no_cast_info = cast_info(" ", " ", " ")
+
+    casts = get_all_cast_information()
+    table = {}
+    dtypes = set()
+    for cast in casts:
+        dtypes.add(cast["from"])
+        dtypes.add(cast["to"])
+
+        if cast["from"] not in table:
+            table[cast["from"]] = {}
+        to_dict = table[cast["from"]]
+
+        can_cast = cast_table[cast["casting"]]
+        legacy = "L" if cast["legacy"] else "."
+        flags = 0
+        if cast["requires_pyapi"]:
+            flags |= 1
+        if cast["supports_unaligned"]:
+            flags |= 2
+        if cast["no_floatingpoint_errors"]:
+            flags |= 4
+
+        flags = flags_table[flags]
+        to_dict[cast["to"]] = cast_info(can_cast=can_cast, legacy=legacy, flags=flags)
+
+    # The np.dtype(x.type) is a bit strange, because dtype classes do
+    # not expose much yet.
+    types = np.typecodes["All"]
+    def sorter(x):
+        # This is a bit weird hack, to get a table as close as possible to
+        # the one printing all typecodes (but expecting user-dtypes).
+        dtype = np.dtype(x.type)
+        try:
+            indx = types.index(dtype.char)
+        except ValueError:
+            indx = np.inf
+        return (indx, dtype.char)
+
+    dtypes = sorted(dtypes, key=sorter)
+
+    def print_table(field="can_cast"):
+        print('X', end=' ')
+        for dt in dtypes:
+            print(np.dtype(dt.type).char, end=' ')
+        print()
+        for from_dt in dtypes:
+            print(np.dtype(from_dt.type).char, end=' ')
+            row = table.get(from_dt, {})
+            for to_dt in dtypes:
+                print(getattr(row.get(to_dt, no_cast_info), field), end=' ')
+            print()
+
+    if can_cast:
+        # Print the actual table:
+        print()
+        print("Casting: # is equivalent, = is safe, ~ is same-kind, and . is unsafe")
+        print()
+        print_table("can_cast")
+
+    if legacy:
+        print()
+        print("L denotes a legacy cast . a non-legacy one.")
+        print()
+        print_table("legacy")
+
+    if flags:
+        print()
+        print(f"{flags_table[0]}: no flags, {flags_table[1]}: PyAPI, "
+              f"{flags_table[2]}: supports unaligned, {flags_table[4]}: no-float-errors")
+        print()
+        print_table("flags")
+
+
 if __name__ == '__main__':
     print("can cast")
     print_cancast_table(np.typecodes['All'])
@@ -89,3 +195,5 @@ if __name__ == '__main__':
     print()
     print("promote_types")
     print_coercion_table(np.typecodes['All'], 0, 0, False, True)
+    print("New casting type promotion:")
+    print_new_cast_table(can_cast=True, legacy=True, flags=True)
diff --git a/numpy/tests/test_scripts.py b/numpy/tests/test_scripts.py
index a0f2ba70a..e67a82947 100644
--- a/numpy/tests/test_scripts.py
+++ b/numpy/tests/test_scripts.py
@@ -38,9 +38,9 @@ def find_f2py_commands():
 def test_f2py(f2py_cmd):
     # test that we can run f2py script
     stdout = subprocess.check_output([f2py_cmd, '-v'])
-    assert_equal(stdout.strip(), b'2')
+    assert_equal(stdout.strip(), np.__version__.encode('ascii'))
 
 
 def test_pep338():
     stdout = subprocess.check_output([sys.executable, '-mnumpy.f2py', '-v'])
-    assert_equal(stdout.strip(), b'2')
+    assert_equal(stdout.strip(), np.__version__.encode('ascii'))
diff --git a/numpy/typing/__init__.py b/numpy/typing/__init__.py
index a9bf94f13..e72e8fb4d 100644
--- a/numpy/typing/__init__.py
+++ b/numpy/typing/__init__.py
@@ -120,7 +120,7 @@ API
 # NOTE: The API section will be appended with additional entries
 # further down in this file
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, List
 
 if TYPE_CHECKING:
     import sys
@@ -131,6 +131,17 @@ if TYPE_CHECKING:
 else:
     def final(f): return f
 
+if not TYPE_CHECKING:
+    __all__ = ["ArrayLike", "DTypeLike", "NBitBase"]
+else:
+    # Ensure that all objects within this module are accessible while
+    # static type checking. This includes private ones, as we need them
+    # for internal use.
+    #
+    # Declare to mypy that `__all__` is a list of strings without assigning
+    # an explicit value
+    __all__: List[str]
+
 
 @final  # Dissallow the creation of arbitrary `NBitBase` subclasses
 class NBitBase:
@@ -194,7 +205,7 @@ class _16Bit(_32Bit): ...  # type: ignore[misc]
 class _8Bit(_16Bit): ...  # type: ignore[misc]
 
 # Clean up the namespace
-del TYPE_CHECKING, final
+del TYPE_CHECKING, final, List
 
 from ._scalars import (
     _CharLike,
@@ -213,7 +224,7 @@ from ._dtype_like import _SupportsDType, _VoidDTypeLike, DTypeLike
 if __doc__ is not None:
     from ._add_docstring import _docstrings
     __doc__ += _docstrings
-    __doc__ += f'\n.. autoclass:: numpy.typing.NBitBase\n'
+    __doc__ += '\n.. autoclass:: numpy.typing.NBitBase\n'
     del _docstrings
 
 from numpy._pytesttester import PytestTester