7 files changed, 283 insertions, 88 deletions
diff --git a/doc/release/upcoming_changes/18880.compatibility.rst b/doc/release/upcoming_changes/18880.compatibility.rst
new file mode 100644
index 000000000..4951463cf
--- /dev/null
+++ b/doc/release/upcoming_changes/18880.compatibility.rst
@@ -0,0 +1,34 @@
+Ufunc ``signature=...`` and ``dtype=`` generalization and ``casting``
+---------------------------------------------------------------------
+The behaviour for ``np.ufunc(1.0, 1.0, signature=...)`` or
+``np.ufunc(1.0, 1.0, dtype=...)`` can now yield different loops in 1.21
+compared to 1.20 because of changes in promotion.
+When ``signature`` was previously used, the casting check on inputs
+was relaxed, which could lead to downcasting inputs unsafely especially
+if combined with ``casting="unsafe"``.
+
+Casting is now guaranteed to be safe.  If a signature is only
+partially provided, for example using ``signature=("float64", None, None)``,
+this could lead to no loop being found (an error).
+In that case, it is necessary to provide the complete signature
+to enforce casting the inputs.
+If ``dtype="float64"`` is used or only outputs are set (e.g.
+``signature=(None, None, "float64")`` the is unchanged.
+We expect that very few users are affected by this change.
+
+Further, the meaning of ``dtype="float64"`` has been slightly modified and
+now strictly enforces only the correct output (and not input) DTypes.
+This means it is now always equivalent to::
+
+    signature=(None, None, "float64")
+
+(If the ufunc has two inputs and one output).  Since this could lead
+to no loop being found in some cases, NumPy will normally also search
+for the loop::
+
+    signature=("float64", "float64", "float64")
+
+if the first search failed.
+In the future, this behaviour may be customized to achieve the expected
+results for more complex ufuncs.  (For some universal functions such as
+``np.ldexp`` inputs can have different DTypes.)
diff --git a/doc/source/reference/ufuncs.rst b/doc/source/reference/ufuncs.rst
index c919ec9b8..27ebf8d1b 100644
--- a/doc/source/reference/ufuncs.rst
+++ b/doc/source/reference/ufuncs.rst
@@ -430,8 +430,10 @@ advanced usage and will not typically be used.
 
     .. versionadded:: 1.6
 
-    Overrides the dtype of the calculation and output arrays. Similar to
-    *signature*.
+    Overrides the DType of the output arrays the same way as the *signature*.
+    This should ensure a matching precision of the calculation.  The exact
+    calculation DTypes chosen may depend on the ufunc and the inputs may be
+    cast to this DType to perform the calculation.
 
 *subok*
 
@@ -442,20 +444,31 @@ advanced usage and will not typically be used.
 
 *signature*
 
-    Either a data-type, a tuple of data-types, or a special signature
-    string indicating the input and output types of a ufunc. This argument
-    allows you to provide a specific signature for the 1-d loop to use
-    in the underlying calculation. If the loop specified does not exist
-    for the ufunc, then a TypeError is raised. Normally, a suitable loop is
-    found automatically by comparing the input types with what is
-    available and searching for a loop with data-types to which all inputs
-    can be cast safely. This keyword argument lets you bypass that
-    search and choose a particular loop. A list of available signatures is
-    provided by the **types** attribute of the ufunc object. For backwards
-    compatibility this argument can also be provided as *sig*, although
-    the long form is preferred. Note that this should not be confused with
-    the generalized ufunc :ref:`signature <details-of-signature>` that is
-    stored in the **signature** attribute of the of the ufunc object.
+    Either a Dtype, a tuple of DTypes, or a special signature string
+    indicating the input and output types of a ufunc.
+
+    This argument allows the user to specify exact DTypes to be used for the
+    calculation.  Casting will be used as necessary. The actual DType of the
+    input arrays is not considered unless ``signature`` is ``None`` for
+    that array.
+
+    When all DTypes are fixed, a specific loop is chosen or an error raised
+    if no matching loop exists.
+    If some DTypes are not specified and left ``None``, the behaviour may
+    depend on the ufunc.
+    At this time, a list of available signatures is provided by the **types**
+    attribute of the ufunc.  (This list may be missing DTypes not defined
+    by NumPy.)
+
+    The ``signature`` only specifies the DType class/type.  For example, it
+    can specifiy that the operation should be ``datetime64`` or ``float64``
+    operation.  It does not specify the ``datetime64`` time-unit or the
+    ``float64`` byte-order.
+
+    For backwards compatibility this argument can also be provided as *sig*,
+    although the long form is preferred.  Note that this should not be
+    confused with the generalized ufunc :ref:`signature <details-of-signature>`
+    that is stored in the **signature** attribute of the of the ufunc object.
 
 *extobj*
 
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 7dffb482f..0644a28c0 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -4542,10 +4542,15 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
                     "Cannot provide `dtype` when a ufunc has no outputs");
             return -1;
         }
-        signature[nin] = _get_dtype(dtype_obj);
-        if (signature[nin] == NULL) {
+        PyArray_DTypeMeta *dtype = _get_dtype(dtype_obj);
+        if (dtype == NULL) {
             return -1;
         }
+        for (int i = nin; i < nop; i++) {
+            Py_INCREF(dtype);
+            signature[i] = dtype;
+        }
+        Py_DECREF(dtype);
         res = _make_new_typetup(nop, signature, out_typetup);
         goto finish;
     }
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index a3f97a8f3..2834235e4 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -288,7 +288,7 @@ PyUFunc_DefaultTypeResolver(PyUFuncObject *ufunc,
     } else {
         /* Find the specified ufunc inner loop, and fill in the dtypes */
         retval = type_tuple_type_resolver(ufunc, type_tup,
-                        operands, casting, any_object, out_dtypes);
+                        operands, input_casting, casting, any_object, out_dtypes);
     }
 
     return retval;
@@ -558,6 +558,11 @@ PyUFunc_SimpleUniformOperationTypeResolver(
          * This is a fast-path, since all descriptors will be identical, mainly
          * when only a single descriptor was passed (which would set the out
          * one in the tuple), there is no need to check all loops.
+         * Note that this also allows (None, None, float64) to resolve to
+         * (float64, float64, float64), even when the inputs do not match,
+         * i.e. fixing the output part of the signature can fix all of them.
+         * This is necessary to support `nextafter(1., inf, dtype=float32)`,
+         * where it is "clear" we want to cast 1. and inf to float32.
          */
         PyArray_Descr *descr = NULL;
         if (PyTuple_CheckExact(type_tup) &&
@@ -565,7 +570,12 @@ PyUFunc_SimpleUniformOperationTypeResolver(
             for (int i = 0; i < nop; i++) {
                 PyObject *item = PyTuple_GET_ITEM(type_tup, i);
                 if (item == Py_None) {
-                    continue;
+                    if (i < ufunc->nin) {
+                        continue;
+                    }
+                    /* All outputs must be set (this could be relaxed) */
+                    descr = NULL;
+                    break;
                 }
                 if (!PyArray_DescrCheck(item)) {
                     /* Defer to default resolver (will raise an error there) */
@@ -1661,6 +1671,9 @@ ufunc_loop_matches(PyUFuncObject *self,
         if (types[i] == NPY_OBJECT && !any_object && self->ntypes > 1) {
             return 0;
         }
+        if (types[i] == NPY_NOTYPE) {
+            continue;  /* Matched by being explicitly specified. */
+        }
 
         /*
          * If type num is NPY_VOID and struct dtypes have been passed in,
@@ -1710,6 +1723,9 @@ ufunc_loop_matches(PyUFuncObject *self,
      * outputs.
      */
     for (i = nin; i < nop; ++i) {
+        if (types[i] == NPY_NOTYPE) {
+            continue;  /* Matched by being explicitly specified. */
+        }
         if (op[i] != NULL) {
             PyArray_Descr *tmp = PyArray_DescrFromType(types[i]);
             if (tmp == NULL) {
@@ -1728,7 +1744,6 @@ ufunc_loop_matches(PyUFuncObject *self,
             Py_DECREF(tmp);
         }
     }
-
     return 1;
 }
 
@@ -1869,12 +1884,15 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
                         int n_specified,
                         int *specified_types,
                         PyArrayObject **op,
+                        NPY_CASTING input_casting,
                         NPY_CASTING casting,
                         int any_object,
                         int use_min_scalar,
                         PyArray_Descr **out_dtype)
 {
     int i, j, nin = self->nin, nop = nin + self->nout;
+    assert(n_specified == nop);
+    int types[NPY_MAXARGS];
 
     /* Use this to try to avoid repeating the same userdef loop search */
     int last_userdef = -1;
@@ -1907,28 +1925,31 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
                 return -1;
             }
             for (; funcdata != NULL; funcdata = funcdata->next) {
-                int *types = funcdata->arg_types;
-                int matched = 1;
-
-                if (n_specified == nop) {
-                    for (j = 0; j < nop; ++j) {
-                        if (types[j] != specified_types[j] &&
-                                    specified_types[j] != NPY_NOTYPE) {
-                            matched = 0;
-                            break;
-                        }
+                int *orig_types = funcdata->arg_types;
+
+                /*
+                 * Copy the types into an int array for matching
+                 * (Mostly duplicated in `type_tuple_type_resolver`)
+                 */
+                for (j = 0; j < nop; ++j) {
+                    if (specified_types[j] == NPY_NOTYPE) {
+                        types[j] = orig_types[j];
+                        continue;
                     }
-                } else {
-                    if (types[nin] != specified_types[0]) {
-                        matched = 0;
+                    if (orig_types[j] != specified_types[j]) {
+                        break;
                     }
+                    /* indicate that we do not have to check this type anymore. */
+                    types[j] = NPY_NOTYPE;
                 }
-                if (!matched) {
+
+                if (j != nop) {
+                    /* no match */
                     continue;
                 }
 
                 switch (ufunc_loop_matches(self, op,
-                            casting, casting,
+                            input_casting, casting,
                             any_object, use_min_scalar,
                             types, NULL,
                             &no_castable_output, &err_src_typecode,
@@ -1936,7 +1957,19 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
                     /* It works */
                     case 1:
                         set_ufunc_loop_data_types(self, op,
-                            out_dtype, types, NULL);
+                            out_dtype, orig_types, NULL);
+                        /*
+                         * In principle, we only need to validate the
+                         * NPY_NOTYPE ones
+                         */
+                        if (PyUFunc_ValidateCasting(self,
+                                casting, op, out_dtype) < 0) {
+                            for (j = 0; j < self->nargs; j++) {
+                                Py_DECREF(out_dtype[j]);
+                                out_dtype[j] = NULL;
+                            }
+                            return -1;
+                        }
                         return 1;
                     /* Didn't match */
                     case 0:
@@ -2069,6 +2102,94 @@ linear_search_type_resolver(PyUFuncObject *self,
     return -1;
 }
 
+
+static int
+type_tuple_type_resolver_core(PyUFuncObject *self,
+        PyArrayObject **op,
+        NPY_CASTING input_casting, NPY_CASTING casting,
+        int specified_types[],
+        int any_object,
+        int no_castable_output, int use_min_scalar,
+        PyArray_Descr **out_dtype)
+{
+    int i, j;
+    int nop = self->nargs;
+    int types[NPY_MAXARGS];
+
+    /* For making a better error message on coercion error */
+    char err_dst_typecode = '-', err_src_typecode = '-';
+
+    /* If the ufunc has userloops, search for them. */
+    if (self->userloops) {
+        switch (type_tuple_userloop_type_resolver(self,
+                nop, specified_types,
+                op, input_casting, casting,
+                any_object, use_min_scalar,
+                out_dtype)) {
+            /* Error */
+            case -1:
+                return -1;
+            /* Found matching loop */
+            case 1:
+                return 0;
+        }
+    }
+
+    for (i = 0; i < self->ntypes; ++i) {
+        char *orig_types = self->types + i*self->nargs;
+
+        /*
+         * Check specified types and copy into an int array for matching
+         * (Mostly duplicated in `type_tuple_userloop_type_resolver`)
+         */
+        for (j = 0; j < nop; ++j) {
+            if (specified_types[j] == NPY_NOTYPE) {
+                types[j] = orig_types[j];
+                continue;
+            }
+            if (orig_types[j] != specified_types[j]) {
+                break;
+            }
+            /* indicate that we do not have to check this type anymore. */
+            types[j] = NPY_NOTYPE;
+        }
+        if (j < nop) {
+            /* no match */
+            continue;
+        }
+
+        switch (ufunc_loop_matches(self, op,
+                input_casting, casting,
+                any_object, use_min_scalar,
+                types, NULL,
+                &no_castable_output, &err_src_typecode,
+                &err_dst_typecode)) {
+            case -1:
+                /* Error */
+                return -1;
+            case 0:
+                /* Cannot cast inputs */
+                continue;
+            case 1:
+                /* Success, fill also the NPY_NOTYPE (cast from char to int) */
+                for (j = 0; j < nop; j++) {
+                    types[j] = orig_types[j];
+                }
+                set_ufunc_loop_data_types(self, op, out_dtype, types, NULL);
+                /* In principle, we only need to validate the NPY_NOTYPE ones */
+                if (PyUFunc_ValidateCasting(self, casting, op, out_dtype) < 0) {
+                    for (j = 0; j < self->nargs; j++) {
+                        Py_DECREF(out_dtype[j]);
+                        out_dtype[j] = NULL;
+                    }
+                    return -1;
+                }
+                return 0;
+        }
+    }
+    return -2;
+}
+
 /*
  * Does a linear search for the inner loop of the ufunc specified by type_tup.
  *
@@ -2079,18 +2200,16 @@ NPY_NO_EXPORT int
 type_tuple_type_resolver(PyUFuncObject *self,
                         PyObject *type_tup,
                         PyArrayObject **op,
+                        NPY_CASTING input_casting,
                         NPY_CASTING casting,
                         int any_object,
                         PyArray_Descr **out_dtype)
 {
-    int i, j, nin = self->nin, nop = nin + self->nout;
-    int specified_types[NPY_MAXARGS], types[NPY_MAXARGS];
+    int nin = self->nin, nop = nin + self->nout;
+    int specified_types[NPY_MAXARGS];
     const char *ufunc_name;
     int no_castable_output = 0, use_min_scalar;
 
-    /* For making a better error message on coercion error */
-    char err_dst_typecode = '-', err_src_typecode = '-';
-
     ufunc_name = ufunc_get_name_cstr(self);
 
     use_min_scalar = should_use_min_scalar(nin, op, 0, NULL);
@@ -2112,7 +2231,7 @@ type_tuple_type_resolver(PyUFuncObject *self,
             PyErr_SetString(PyExc_RuntimeError, bad_type_tup_msg);
             return -1;
         }
-        for (i = 0; i < nop; ++i) {
+        for (int i = 0; i < nop; ++i) {
             PyObject *item = PyTuple_GET_ITEM(type_tup, i);
             if (item == Py_None) {
                 specified_types[i] = NPY_NOTYPE;
@@ -2131,57 +2250,51 @@ type_tuple_type_resolver(PyUFuncObject *self,
         return -1;
     }
 
-    /* If the ufunc has userloops, search for them. */
-    if (self->userloops) {
-        switch (type_tuple_userloop_type_resolver(self,
-                        nop, specified_types,
-                        op, casting,
-                        any_object, use_min_scalar,
-                        out_dtype)) {
-            /* Error */
-            case -1:
-                return -1;
-            /* Found matching loop */
-            case 1:
-                return 0;
-        }
-    }
-
-    for (i = 0; i < self->ntypes; ++i) {
-        char *orig_types = self->types + i*self->nargs;
+    int res = type_tuple_type_resolver_core(self,
+            op, input_casting, casting, specified_types, any_object,
+            no_castable_output, use_min_scalar, out_dtype);
 
-        /* Copy the types into an int array for matching */
-        for (j = 0; j < nop; ++j) {
-            types[j] = orig_types[j];
-        }
+    if (res != -2) {
+        return res;
+    }
 
-        for (j = 0; j < nop; ++j) {
-            if (types[j] != specified_types[j] &&
-                    specified_types[j] != NPY_NOTYPE) {
+    /*
+     * When the user passes `dtype=dtype`, it gets translated to
+     * `signature=(None,)*nin + (dtype,)*nout`.  If the signature matches that
+     * exactly (could be relaxed but that is not necessary for backcompat),
+     * we also try `signature=(dtype,)*(nin+nout)`.
+     * This used to be the main meaning for `dtype=dtype`, but some calls broke
+     * the expectation, and changing it allows for `dtype=dtype` to be useful
+     * for ufuncs like `np.ldexp` in the future while also normalizing it to
+     * a `signature` early on.
+     */
+    int homogeneous_type = NPY_NOTYPE;
+    if (self->nout > 0) {
+        homogeneous_type = specified_types[nin];
+        for (int i = nin+1; i < nop; i++) {
+            if (specified_types[i] != homogeneous_type) {
+                homogeneous_type = NPY_NOTYPE;
                 break;
             }
         }
-        if (j < nop) {
-            /* no match */
-            continue;
+    }
+    if (homogeneous_type != NPY_NOTYPE) {
+        for (int i = 0; i < nin; i++) {
+            if (specified_types[i] != NPY_NOTYPE) {
+                homogeneous_type = NPY_NOTYPE;
+                break;
+            }
+            specified_types[i] = homogeneous_type;
         }
+    }
+    if (homogeneous_type != NPY_NOTYPE) {
+        /* Try again with the homogeneous specified types. */
+        res = type_tuple_type_resolver_core(self,
+                op, input_casting, casting, specified_types, any_object,
+                no_castable_output, use_min_scalar, out_dtype);
 
-        switch (ufunc_loop_matches(self, op,
-                    casting, casting,
-                    any_object, use_min_scalar,
-                    types, NULL,
-                    &no_castable_output, &err_src_typecode,
-                    &err_dst_typecode)) {
-            case -1:
-                /* Error */
-                return -1;
-            case 0:
-                /* Cannot cast inputs */
-                continue;
-            case 1:
-                /* Success */
-                set_ufunc_loop_data_types(self, op, out_dtype, types, NULL);
-                return 0;
+        if (res != -2) {
+            return res;
         }
     }
 
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index 1d6ad3358..b11c69852 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -123,6 +123,7 @@ NPY_NO_EXPORT int
 type_tuple_type_resolver(PyUFuncObject *self,
                          PyObject *type_tup,
                          PyArrayObject **op,
+                         NPY_CASTING input_casting,
                          NPY_CASTING casting,
                          int any_object,
                          PyArray_Descr **out_dtype);
diff --git a/numpy/core/tests/test_scalar_methods.py b/numpy/core/tests/test_scalar_methods.py
index 4f5fd2988..3693bba59 100644
--- a/numpy/core/tests/test_scalar_methods.py
+++ b/numpy/core/tests/test_scalar_methods.py
@@ -89,7 +89,8 @@ class TestAsIntegerRatio:
     ])
     def test_roundtrip(self, ftype, frac_vals, exp_vals):
         for frac, exp in zip(frac_vals, exp_vals):
-            f = np.ldexp(frac, exp, dtype=ftype)
+            f = np.ldexp(ftype(frac), exp)
+            assert f.dtype == ftype
             n, d = f.as_integer_ratio()
 
             try:
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 64ecb3780..a47f1df49 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -457,6 +457,34 @@ class TestUfunc:
         float_dtype = type(np.dtype(np.float64))
         np.add(3, 4, signature=(float_dtype, float_dtype, None))
 
+    @pytest.mark.parametrize("casting", ["unsafe", "same_kind", "safe"])
+    def test_partial_signature_mismatch(self, casting):
+        # If the second argument matches already, no need to specify it:
+        res = np.ldexp(np.float32(1.), np.int_(2), dtype="d")
+        assert res.dtype == "d"
+        res = np.ldexp(np.float32(1.), np.int_(2), signature=(None, None, "d"))
+        assert res.dtype == "d"
+
+        # ldexp only has a loop for long input as second argument, overriding
+        # the output cannot help with that (no matter the casting)
+        with pytest.raises(TypeError):
+            np.ldexp(1., np.uint64(3), dtype="d")
+        with pytest.raises(TypeError):
+            np.ldexp(1., np.uint64(3), signature=(None, None, "d"))
+
+    def test_use_output_signature_for_all_arguments(self):
+        # Test that providing only `dtype=` or `signature=(None, None, dtype)`
+        # is sufficient if falling back to a homogeneous signature works.
+        # In this case, the `intp, intp -> intp` loop is chosen.
+        res = np.power(1.5, 2.8, dtype=np.intp, casting="unsafe")
+        assert res == 1  # the cast happens first.
+        res = np.power(1.5, 2.8, signature=(None, None, np.intp),
+                       casting="unsafe")
+        assert res == 1
+        with pytest.raises(TypeError):
+            # the unsafe casting would normally cause errors though:
+            np.power(1.5, 2.8, dtype=np.intp)
+
     def test_signature_errors(self):
         with pytest.raises(TypeError,
                     match="the signature object to ufunc must be a string or"):