6 files changed, 307 insertions, 90 deletions
diff --git a/numpy/core/include/numpy/experimental_dtype_api.h b/numpy/core/include/numpy/experimental_dtype_api.h
index 554c7fb6c..effa66baf 100644
--- a/numpy/core/include/numpy/experimental_dtype_api.h
+++ b/numpy/core/include/numpy/experimental_dtype_api.h
@@ -181,6 +181,12 @@ typedef PyObject *_ufunc_addloop_fromspec_func(
 /*
  * Type of the C promoter function, which must be wrapped into a
  * PyCapsule with name "numpy._ufunc_promoter".
+ *
+ * Note that currently the output dtypes are always NULL unless they are
+ * also part of the signature.  This is an implementation detail and could
+ * change in the future.  However, in general promoters should not have a
+ * need for output dtypes.
+ * (There are potential use-cases, these are currently unsupported.)
  */
 typedef int promoter_function(PyObject *ufunc,
         PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index 8e99c0420..772e46d64 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -46,6 +46,7 @@
 
 #include "dispatching.h"
 #include "dtypemeta.h"
+#include "common_dtype.h"
 #include "npy_hashtable.h"
 #include "legacy_array_method.h"
 #include "ufunc_object.h"
@@ -147,6 +148,23 @@ PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate)
  * (Based on `isinstance()`, the knowledge that non-abstract DTypes cannot
  * be subclassed is used, however.)
  *
+ * NOTE: This currently does not take into account output dtypes which do not
+ *       have to match.  The possible extension here is that if an output
+ *       is given (and thus an output dtype), but not part of the signature
+ *       we could ignore it for matching, but *prefer* a loop that matches
+ *       better.
+ *       Why is this not done currently?  First, it seems a niche feature that
+ *       loops can only be distinguished based on the output dtype.  Second,
+ *       there are some nasty theoretical things because:
+ *
+ *            np.add(f4, f4, out=f8)
+ *            np.add(f4, f4, out=f8, dtype=f8)
+ *
+ *       are different, the first uses the f4 loop, the second the f8 loop.
+ *       The problem is, that the current cache only uses the op_dtypes and
+ *       both are `(f4, f4, f8)`.  The cache would need to store also which
+ *       output was provided by `dtype=`/`signature=`.
+ *
  * @param ufunc
  * @param op_dtypes The DTypes that are either passed in (defined by an
  *        operand) or defined by the `signature` as also passed in as
@@ -159,7 +177,8 @@ PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate)
  */
 static int
 resolve_implementation_info(PyUFuncObject *ufunc,
-        PyArray_DTypeMeta *op_dtypes[], PyObject **out_info)
+        PyArray_DTypeMeta *op_dtypes[], npy_bool only_promoters,
+        PyObject **out_info)
 {
     int nin = ufunc->nin, nargs = ufunc->nargs;
     Py_ssize_t size = PySequence_Length(ufunc->_loops);
@@ -170,6 +189,12 @@ resolve_implementation_info(PyUFuncObject *ufunc,
         /* Test all resolvers  */
         PyObject *resolver_info = PySequence_Fast_GET_ITEM(
                 ufunc->_loops, res_idx);
+
+        if (only_promoters && PyObject_TypeCheck(
+                    PyTuple_GET_ITEM(resolver_info, 1), &PyArrayMethod_Type)) {
+            continue;
+        }
+
         PyObject *curr_dtypes = PyTuple_GET_ITEM(resolver_info, 0);
         /*
          * Test if the current resolver matches, it could make sense to
@@ -179,20 +204,31 @@ resolve_implementation_info(PyUFuncObject *ufunc,
 
         npy_bool matches = NPY_TRUE;
         /*
-         * NOTE: We check also the output DType.  In principle we do not
-         *       have to strictly match it (unless it is provided by the
-         *       `signature`).  This assumes that a (fallback) promoter will
-         *       unset the output DType if no exact match is found.
+         * NOTE: We currently match the output dtype exactly here, this is
+         *       actually only necessary if the signature includes.
+         *       Currently, we rely that op-dtypes[nin:nout] is NULLed if not.
          */
         for (Py_ssize_t i = 0; i < nargs; i++) {
             PyArray_DTypeMeta *given_dtype = op_dtypes[i];
             PyArray_DTypeMeta *resolver_dtype = (
                     (PyArray_DTypeMeta *)PyTuple_GET_ITEM(curr_dtypes, i));
             assert((PyObject *)given_dtype != Py_None);
-            if (given_dtype == NULL && i >= nin) {
-                /* Unspecified out always matches (see below for inputs) */
-                continue;
+            if (given_dtype == NULL) {
+                if (i >= nin) {
+                    /* Unspecified out always matches (see below for inputs) */
+                    continue;
+                }
+                /*
+                 * This is a reduce-like operation, which always have the form
+                 * `(res_DType, op_DType, res_DType)`.  If the first and last
+                 * dtype of the loops match, this should be reduce-compatible.
+                 */
+                if (PyTuple_GET_ITEM(curr_dtypes, 0)
+                        == PyTuple_GET_ITEM(curr_dtypes, 2)) {
+                    continue;
+                }
             }
+
             if (resolver_dtype == (PyArray_DTypeMeta *)Py_None) {
                 /* always matches */
                 continue;
@@ -204,24 +240,7 @@ resolve_implementation_info(PyUFuncObject *ufunc,
                 matches = NPY_FALSE;
                 break;
             }
-            if (given_dtype == NULL) {
-                /*
-                 * If an input was not specified, this is a reduce-like
-                 * operation: reductions use `(operand_DType, NULL, out_DType)`
-                 * as they only have a single operand.  This allows special
-                 * reduce promotion rules useful for example for sum/product.
-                 * E.g. `np.add.reduce([True, True])` promotes to integer.
-                 *
-                 * Continuing here allows a promoter to handle reduce-like
-                 * promotions explicitly if necessary.
-                 * TODO: The `!NPY_DT_is_abstract(resolver_dtype)` currently
-                 *       ensures that this is a promoter.  If we allow
-                 *       `ArrayMethods` to use abstract DTypes, we may have to
-                 *       reject it here or the `ArrayMethod` has to implement
-                 *       the reduce promotion.
-                 */
-                continue;
-            }
+
             int subclass = PyObject_IsSubclass(
                     (PyObject *)given_dtype, (PyObject *)resolver_dtype);
             if (subclass < 0) {
@@ -254,8 +273,12 @@ resolve_implementation_info(PyUFuncObject *ufunc,
              * In all cases, we give up resolution, since it would be
              * necessary to compare to two "best" cases.
              */
-            int unambiguously_equally_good = 1;
             for (Py_ssize_t i = 0; i < nargs; i++) {
+                if (i == ufunc->nin && current_best != -1) {
+                    /* inputs prefer one loop and outputs have lower priority */
+                    break;
+                }
+
                 int best;
 
                 PyObject *prev_dtype = PyTuple_GET_ITEM(best_dtypes, i);
@@ -265,50 +288,18 @@ resolve_implementation_info(PyUFuncObject *ufunc,
                     /* equivalent, so this entry does not matter */
                     continue;
                 }
-                /*
-                 * TODO: Even if the input is not specified, if we have
-                 *       abstract DTypes and one is a subclass of the other,
-                 *       the subclass should be considered a better match
-                 *       (subclasses are always more specific).
-                 */
-                /* Whether this (normally output) dtype was specified at all */
                 if (op_dtypes[i] == NULL) {
                     /*
-                     * When DType is completely unspecified, prefer abstract
-                     * over concrete, assuming it will resolve.
-                     * Furthermore, we cannot decide which abstract/None
-                     * is "better", only concrete ones which are subclasses
-                     * of Abstract ones are defined as worse.
+                     * If an a dtype is NULL it always matches, so there is no
+                     * point in defining one as more precise than the other.
                      */
-                    npy_bool prev_is_concrete = NPY_FALSE;
-                    npy_bool new_is_concrete = NPY_FALSE;
-                    if ((prev_dtype != Py_None) &&
-                            !NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) {
-                        prev_is_concrete = NPY_TRUE;
-                    }
-                    if ((new_dtype != Py_None) &&
-                            !NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
-                        new_is_concrete = NPY_TRUE;
-                    }
-                    if (prev_is_concrete == new_is_concrete) {
-                        best = -1;
-                    }
-                    else if (prev_is_concrete) {
-                        unambiguously_equally_good = 0;
-                        best = 1;
-                    }
-                    else {
-                        unambiguously_equally_good = 0;
-                        best = 0;
-                    }
+                    continue;
                 }
                 /* If either is None, the other is strictly more specific */
-                else if (prev_dtype == Py_None) {
-                    unambiguously_equally_good = 0;
+                if (prev_dtype == Py_None) {
                     best = 1;
                 }
                 else if (new_dtype == Py_None) {
-                    unambiguously_equally_good = 0;
                     best = 0;
                 }
                 /*
@@ -318,20 +309,25 @@ resolve_implementation_info(PyUFuncObject *ufunc,
                 else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype) &&
                          !NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
                     /*
-                     * Ambiguous unless the are identical (checked above),
-                     * but since they are concrete it does not matter which
-                     * best to compare.
+                     * Ambiguous unless they are identical (checked above),
+                     * or one matches exactly.
                      */
-                    best = -1;
+                    if (prev_dtype == (PyObject *)op_dtypes[i]) {
+                        best = 0;
+                    }
+                    else if (new_dtype == (PyObject *)op_dtypes[i]) {
+                        best = 1;
+                    }
+                    else {
+                        best = -1;
+                    }
                 }
                 else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) {
                     /* old is not abstract, so better (both not possible) */
-                    unambiguously_equally_good = 0;
                     best = 0;
                 }
                 else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
                     /* new is not abstract, so better (both not possible) */
-                    unambiguously_equally_good = 0;
                     best = 1;
                 }
                 /*
@@ -349,6 +345,10 @@ resolve_implementation_info(PyUFuncObject *ufunc,
                     return -1;
                 }
 
+                if (best == -1) {
+                    /* no new info, nothing to update */
+                    continue;
+                }
                 if ((current_best != -1) && (current_best != best)) {
                     /*
                      * We need a clear best, this could be tricky, unless
@@ -367,15 +367,34 @@ resolve_implementation_info(PyUFuncObject *ufunc,
 
             if (current_best == -1) {
                 /*
-                 * TODO: It would be nice to have a "diagnostic mode" that
-                 *       informs if this happens! (An immediate error currently
-                 *       blocks later legacy resolution, but may work in the
-                 *       future.)
+                 * We could not find a best loop, but promoters should be
+                 * designed in a way to disambiguate such scenarios, so we
+                 * retry the whole lookup using only promoters.
+                 * (There is a small chance we already got two promoters.
+                 * We just redo it anyway for simplicity.)
                  */
-                if (unambiguously_equally_good) {
-                    /* unset the best resolver to indicate this */
-                    best_resolver_info = NULL;
-                    continue;
+                if (!only_promoters) {
+                    return resolve_implementation_info(ufunc,
+                            op_dtypes, NPY_TRUE, out_info);
+                }
+                /*
+                 * If this is already the retry, we are out of luck.  Promoters
+                 * should be designed in a way that this cannot happen!
+                 * (It should be noted, that the retry might not find anything
+                 * and we still do a legacy lookup later.)
+                 */
+                PyObject *given = PyArray_TupleFromItems(
+                        ufunc->nargs, (PyObject **)op_dtypes, 1);
+                if (given != NULL) {
+                    PyErr_Format(PyExc_RuntimeError,
+                            "Could not find a loop for the inputs:\n    %S\n"
+                            "The two promoters %S and %S matched the input "
+                            "equally well.  Promoters must be designed "
+                            "to be unambiguous.  NOTE: This indicates an error "
+                            "in NumPy or an extending library and should be "
+                            "reported.",
+                            given, best_dtypes, curr_dtypes);
+                    Py_DECREF(given);
                 }
                 *out_info = NULL;
                 return 0;
@@ -648,7 +667,8 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
      * in `resolve_implementation_info`.
      */
     if (info == NULL) {
-        if (resolve_implementation_info(ufunc, op_dtypes, &info) < 0) {
+        if (resolve_implementation_info(ufunc,
+                op_dtypes, NPY_FALSE, &info) < 0) {
             return NULL;
         }
         if (info != NULL && PyObject_TypeCheck(
@@ -766,7 +786,7 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
         npy_bool force_legacy_promotion,
         npy_bool allow_legacy_promotion)
 {
-    int nargs = ufunc->nargs;
+    int nin = ufunc->nin, nargs = ufunc->nargs;
 
     /*
      * Get the actual DTypes we operate with by mixing the operand array
@@ -782,6 +802,15 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
             Py_XSETREF(op_dtypes[i], signature[i]);
             assert(i >= ufunc->nin || !NPY_DT_is_abstract(signature[i]));
         }
+        else if (i > nin) {
+            /*
+             * We currently just ignore outputs if not in signature, this will
+             * always give the/a correct result (limits registering specialized
+             * loops which include the cast).
+             * (See also comment in resolve_implementation_info.)
+             */
+            Py_CLEAR(op_dtypes[i]);
+        }
     }
 
     if (force_legacy_promotion) {
@@ -826,6 +855,112 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
 
 
 /*
+ * Generic promoter used by as a final fallback on ufuncs.  Most operations are
+ * homogeneous, so we can try to find the homogeneous dtype on the inputs
+ * and use that.
+ * We need to special case the reduction case, where op_dtypes[0] == NULL
+ * is possible.
+ */
+NPY_NO_EXPORT int
+default_ufunc_promoter(PyUFuncObject *ufunc,
+        PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
+        PyArray_DTypeMeta *new_op_dtypes[])
+{
+    if (ufunc->type_resolver == &PyUFunc_SimpleBinaryComparisonTypeResolver
+            && signature[0] == NULL && signature[1] == NULL
+            && signature[2] != NULL && signature[2]->type_num != NPY_BOOL) {
+        /* bail out, this is _only_ to give future/deprecation warning! */
+        return -1;
+    }
+
+    /* If nin < 2 promotion is a no-op, so it should not be registered */
+    assert(ufunc->nin > 1);
+    if (op_dtypes[0] == NULL) {
+        assert(ufunc->nin == 2 && ufunc->nout == 1);  /* must be reduction */
+        Py_INCREF(op_dtypes[1]);
+        new_op_dtypes[0] = op_dtypes[1];
+        Py_INCREF(op_dtypes[1]);
+        new_op_dtypes[1] = op_dtypes[1];
+        Py_INCREF(op_dtypes[1]);
+        new_op_dtypes[2] = op_dtypes[1];
+        return 0;
+    }
+    PyArray_DTypeMeta *common = NULL;
+    /*
+     * If a signature is used and homogeneous in its outputs use that
+     * (Could/should likely be rather applied to inputs also, although outs
+     * only could have some advantage and input dtypes are rarely enforced.)
+     */
+    for (int i = ufunc->nin; i < ufunc->nargs; i++) {
+        if (signature[i] != NULL) {
+            if (common == NULL) {
+                Py_INCREF(signature[i]);
+                common = signature[i];
+            }
+            else if (common != signature[i]) {
+                Py_CLEAR(common);  /* Not homogeneous, unset common */
+                break;
+            }
+        }
+    }
+    /* Otherwise, use the common DType of all input operands */
+    if (common == NULL) {
+        common = PyArray_PromoteDTypeSequence(ufunc->nin, op_dtypes);
+        if (common == NULL) {
+            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+                PyErr_Clear();  /* Do not propagate normal promotion errors */
+            }
+            return -1;
+        }
+    }
+
+    for (int i = 0; i < ufunc->nargs; i++) {
+        PyArray_DTypeMeta *tmp = common;
+        if (signature[i]) {
+            tmp = signature[i];  /* never replace a fixed one. */
+        }
+        Py_INCREF(tmp);
+        new_op_dtypes[i] = tmp;
+    }
+    for (int i = ufunc->nin; i < ufunc->nargs; i++) {
+        Py_XINCREF(op_dtypes[i]);
+        new_op_dtypes[i] = op_dtypes[i];
+    }
+
+    Py_DECREF(common);
+    return 0;
+}
+
+
+/*
+ * In some cases, we assume that there will only ever be object loops,
+ * and the object loop should *always* be chosen.
+ * (in those cases more specific loops should not really be registered, but
+ * we do not check that.)
+ *
+ * We default to this for "old-style" ufuncs which have exactly one loop
+ * consisting only of objects (during registration time, numba mutates this
+ * but presumably).
+ */
+NPY_NO_EXPORT int
+object_only_ufunc_promoter(PyUFuncObject *ufunc,
+        PyArray_DTypeMeta *NPY_UNUSED(op_dtypes[]),
+        PyArray_DTypeMeta *signature[],
+        PyArray_DTypeMeta *new_op_dtypes[])
+{
+    PyArray_DTypeMeta *object_DType = PyArray_DTypeFromTypeNum(NPY_OBJECT);
+
+    for (int i = 0; i < ufunc->nargs; i++) {
+        if (signature[i] == NULL) {
+            Py_INCREF(object_DType);
+            new_op_dtypes[i] = object_DType;
+        }
+    }
+    Py_DECREF(object_DType);
+    return 0;
+}
+
+/*
  * Special promoter for the logical ufuncs.  The logical ufuncs can always
  * use the ??->? and still get the correct output (as long as the output
  * is not supposed to be `object`).
@@ -843,6 +978,12 @@ logical_ufunc_promoter(PyUFuncObject *NPY_UNUSED(ufunc),
      */
     int force_object = 0;
 
+    if (signature[0] == NULL && signature[1] == NULL
+            && signature[2] != NULL && signature[2]->type_num != NPY_BOOL) {
+        /* bail out, this is _only_ to give future/deprecation warning! */
+        return -1;
+    }
+
     for (int i = 0; i < 3; i++) {
         PyArray_DTypeMeta *item;
         if (signature[i] != NULL) {
@@ -913,4 +1054,3 @@ install_logical_ufunc_promoter(PyObject *ufunc)
 
     return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
 }
-
diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h
index 2f314615d..305b0549f 100644
--- a/numpy/core/src/umath/dispatching.h
+++ b/numpy/core/src/umath/dispatching.h
@@ -27,6 +27,17 @@ add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
         PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate);
 
 NPY_NO_EXPORT int
+default_ufunc_promoter(PyUFuncObject *ufunc,
+        PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
+        PyArray_DTypeMeta *new_op_dtypes[]);
+
+NPY_NO_EXPORT int
+object_only_ufunc_promoter(PyUFuncObject *ufunc,
+        PyArray_DTypeMeta *NPY_UNUSED(op_dtypes[]),
+        PyArray_DTypeMeta *signature[],
+        PyArray_DTypeMeta *new_op_dtypes[]);
+
+NPY_NO_EXPORT int
 install_logical_ufunc_promoter(PyObject *ufunc);
 
 
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index bb951b0b6..5863a2b83 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2756,7 +2756,7 @@ reducelike_promote_and_resolve(PyUFuncObject *ufunc,
     }
 
     PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
-            ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE);
+            ops, signature, operation_DTypes, NPY_FALSE, NPY_FALSE);
     Py_DECREF(operation_DTypes[1]);
     if (out != NULL) {
         Py_DECREF(operation_DTypes[0]);
@@ -2786,8 +2786,10 @@ reducelike_promote_and_resolve(PyUFuncObject *ufunc,
     if (out_descrs[0] != out_descrs[2] || (
             enforce_uniform_args && out_descrs[0] != out_descrs[1])) {
         PyErr_Format(PyExc_TypeError,
-                "the resolved dtypes are not compatible with %s.%s",
-                ufunc_get_name_cstr(ufunc), method);
+                "the resolved dtypes are not compatible with %s.%s. "
+                "Resolved (%R, %R, %R)",
+                ufunc_get_name_cstr(ufunc), method,
+                out_descrs[0], out_descrs[1], out_descrs[2]);
         goto fail;
     }
     /* TODO: This really should _not_ be unsafe casting (same above)! */
@@ -5205,6 +5207,61 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
 
         info = add_and_return_legacy_wrapping_ufunc_loop(ufunc, op_dtypes, 1);
         if (info == NULL) {
+            Py_DECREF(ufunc);
+            return NULL;
+        }
+    }
+
+    PyObject *promoter = NULL;
+    if (ufunc->ntypes == 1) {
+        npy_bool all_object = NPY_TRUE;
+        for (int i = 0; i < ufunc->nargs; i++) {
+            if (ufunc->types[i] != NPY_OBJECT) {
+                all_object = NPY_FALSE;
+                break;
+            }
+        }
+        if (all_object) {
+            promoter = PyCapsule_New(&object_only_ufunc_promoter,
+                    "numpy._ufunc_promoter", NULL);
+            if (promoter == NULL) {
+                Py_DECREF(ufunc);
+                return NULL;
+            }
+        }
+    }
+    if (promoter == NULL && ufunc->nin > 1) {
+        promoter = PyCapsule_New(&default_ufunc_promoter,
+                "numpy._ufunc_promoter", NULL);
+        if (promoter == NULL) {
+            Py_DECREF(ufunc);
+            return NULL;
+        }
+    }
+    if (promoter != NULL) {
+        /* Always install default promoter using the common DType */
+        PyObject *dtype_tuple = PyTuple_New(ufunc->nargs);
+        if (dtype_tuple == NULL) {
+            Py_DECREF(promoter);
+            Py_DECREF(ufunc);
+            return NULL;
+        }
+        for (int i = 0; i < ufunc->nargs; i++) {
+            Py_INCREF(Py_None);
+            PyTuple_SET_ITEM(dtype_tuple, i, Py_None);
+        }
+        PyObject *info = PyTuple_Pack(2, dtype_tuple, promoter);
+        Py_DECREF(dtype_tuple);
+        Py_DECREF(promoter);
+        if (info == NULL) {
+            Py_DECREF(ufunc);
+            return NULL;
+        }
+
+        int res = PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
+        Py_DECREF(info);
+        if (res < 0) {
+            Py_DECREF(ufunc);
             return NULL;
         }
     }
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index b95d669a8..50da7b800 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -2033,15 +2033,15 @@ class TestDateTime:
         # subtracting two datetime64 works, but we cannot reduce it, since
         # the result of that subtraction will have a different dtype.
         arr = np.array(["2021-12-02", "2019-05-12"], dtype="M8[ms]")
-        msg = r"the resolved dtypes are not compatible with subtract\."
+        msg = r"ufunc 'subtract' did not contain a loop with signature "
 
-        with pytest.raises(TypeError, match=msg + "reduce"):
+        with pytest.raises(TypeError, match=msg):
             np.subtract.reduce(arr)
 
-        with pytest.raises(TypeError, match=msg + "accumulate"):
+        with pytest.raises(TypeError, match=msg):
             np.subtract.accumulate(arr)
 
-        with pytest.raises(TypeError, match=msg + "reduceat"):
+        with pytest.raises(TypeError, match=msg):
             np.subtract.reduceat(arr, [0])
 
     def test_datetime_busday_offset(self):
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 398bf88db..d2bbbc181 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1762,12 +1762,15 @@ class TestUfunc:
         result = _rational_tests.test_add(a, b)
         assert_equal(result, target)
 
-        # But since we use the old type resolver, this may not work
-        # for dtype variations unless the output dtype is given:
+        # This works even more generally, so long the default common-dtype
+        # promoter works out:
         result = _rational_tests.test_add(a, b.astype(np.uint16), out=c)
         assert_equal(result, target)
+
+        # But, it can be fooled, e.g. (use scalars, which forces legacy
+        # type resolution to kick in, which then fails):
         with assert_raises(TypeError):
-            _rational_tests.test_add(a, b.astype(np.uint16))
+            _rational_tests.test_add(a, np.uint16(2))
 
     def test_operand_flags(self):
         a = np.arange(16, dtype='l').reshape(4, 4)