summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2021-05-14 13:11:55 +0300
committerGitHub <noreply@github.com>2021-05-14 13:11:55 +0300
commit5eb5802e5ca2b1328a272d24872a9424c325e0a5 (patch)
treed02dad6027acd132e1d28fb7990f19435f7df57f
parentf1eeb833a32450d1c891ab66186125eca9b88ea2 (diff)
parentc2f2fa0849836d1b2e16686b5833c650a1574e8b (diff)
downloadnumpy-5eb5802e5ca2b1328a272d24872a9424c325e0a5.tar.gz
Merge pull request #18880 from seberg/make-signature-resolve-more-strict-and-fix-nextafter
API: Ensure that casting does not affect ufunc loop
-rw-r--r--doc/release/upcoming_changes/18880.compatibility.rst34
-rw-r--r--doc/source/reference/ufuncs.rst45
-rw-r--r--numpy/core/src/umath/ufunc_object.c9
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c251
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h1
-rw-r--r--numpy/core/tests/test_scalar_methods.py3
-rw-r--r--numpy/core/tests/test_ufunc.py28
7 files changed, 283 insertions, 88 deletions
diff --git a/doc/release/upcoming_changes/18880.compatibility.rst b/doc/release/upcoming_changes/18880.compatibility.rst
new file mode 100644
index 000000000..4951463cf
--- /dev/null
+++ b/doc/release/upcoming_changes/18880.compatibility.rst
@@ -0,0 +1,34 @@
+Ufunc ``signature=...`` and ``dtype=`` generalization and ``casting``
+---------------------------------------------------------------------
+The behaviour for ``np.ufunc(1.0, 1.0, signature=...)`` or
+``np.ufunc(1.0, 1.0, dtype=...)`` can now yield different loops in 1.21
+compared to 1.20 because of changes in promotion.
+When ``signature`` was previously used, the casting check on inputs
+was relaxed, which could lead to downcasting inputs unsafely especially
+if combined with ``casting="unsafe"``.
+
+Casting is now guaranteed to be safe. If a signature is only
+partially provided, for example using ``signature=("float64", None, None)``,
+this could lead to no loop being found (an error).
+In that case, it is necessary to provide the complete signature
+to enforce casting the inputs.
+If ``dtype="float64"`` is used or only outputs are set (e.g.
+``signature=(None, None, "float64")`` the is unchanged.
+We expect that very few users are affected by this change.
+
+Further, the meaning of ``dtype="float64"`` has been slightly modified and
+now strictly enforces only the correct output (and not input) DTypes.
+This means it is now always equivalent to::
+
+ signature=(None, None, "float64")
+
+(If the ufunc has two inputs and one output). Since this could lead
+to no loop being found in some cases, NumPy will normally also search
+for the loop::
+
+ signature=("float64", "float64", "float64")
+
+if the first search failed.
+In the future, this behaviour may be customized to achieve the expected
+results for more complex ufuncs. (For some universal functions such as
+``np.ldexp`` inputs can have different DTypes.)
diff --git a/doc/source/reference/ufuncs.rst b/doc/source/reference/ufuncs.rst
index c919ec9b8..27ebf8d1b 100644
--- a/doc/source/reference/ufuncs.rst
+++ b/doc/source/reference/ufuncs.rst
@@ -430,8 +430,10 @@ advanced usage and will not typically be used.
.. versionadded:: 1.6
- Overrides the dtype of the calculation and output arrays. Similar to
- *signature*.
+ Overrides the DType of the output arrays the same way as the *signature*.
+ This should ensure a matching precision of the calculation. The exact
+ calculation DTypes chosen may depend on the ufunc and the inputs may be
+ cast to this DType to perform the calculation.
*subok*
@@ -442,20 +444,31 @@ advanced usage and will not typically be used.
*signature*
- Either a data-type, a tuple of data-types, or a special signature
- string indicating the input and output types of a ufunc. This argument
- allows you to provide a specific signature for the 1-d loop to use
- in the underlying calculation. If the loop specified does not exist
- for the ufunc, then a TypeError is raised. Normally, a suitable loop is
- found automatically by comparing the input types with what is
- available and searching for a loop with data-types to which all inputs
- can be cast safely. This keyword argument lets you bypass that
- search and choose a particular loop. A list of available signatures is
- provided by the **types** attribute of the ufunc object. For backwards
- compatibility this argument can also be provided as *sig*, although
- the long form is preferred. Note that this should not be confused with
- the generalized ufunc :ref:`signature <details-of-signature>` that is
- stored in the **signature** attribute of the of the ufunc object.
+ Either a Dtype, a tuple of DTypes, or a special signature string
+ indicating the input and output types of a ufunc.
+
+ This argument allows the user to specify exact DTypes to be used for the
+ calculation. Casting will be used as necessary. The actual DType of the
+ input arrays is not considered unless ``signature`` is ``None`` for
+ that array.
+
+ When all DTypes are fixed, a specific loop is chosen or an error raised
+ if no matching loop exists.
+ If some DTypes are not specified and left ``None``, the behaviour may
+ depend on the ufunc.
+ At this time, a list of available signatures is provided by the **types**
+ attribute of the ufunc. (This list may be missing DTypes not defined
+ by NumPy.)
+
+ The ``signature`` only specifies the DType class/type. For example, it
+ can specifiy that the operation should be ``datetime64`` or ``float64``
+ operation. It does not specify the ``datetime64`` time-unit or the
+ ``float64`` byte-order.
+
+ For backwards compatibility this argument can also be provided as *sig*,
+ although the long form is preferred. Note that this should not be
+ confused with the generalized ufunc :ref:`signature <details-of-signature>`
+ that is stored in the **signature** attribute of the of the ufunc object.
*extobj*
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 7dffb482f..0644a28c0 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -4542,10 +4542,15 @@ _get_normalized_typetup(PyUFuncObject *ufunc,
"Cannot provide `dtype` when a ufunc has no outputs");
return -1;
}
- signature[nin] = _get_dtype(dtype_obj);
- if (signature[nin] == NULL) {
+ PyArray_DTypeMeta *dtype = _get_dtype(dtype_obj);
+ if (dtype == NULL) {
return -1;
}
+ for (int i = nin; i < nop; i++) {
+ Py_INCREF(dtype);
+ signature[i] = dtype;
+ }
+ Py_DECREF(dtype);
res = _make_new_typetup(nop, signature, out_typetup);
goto finish;
}
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index a3f97a8f3..2834235e4 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -288,7 +288,7 @@ PyUFunc_DefaultTypeResolver(PyUFuncObject *ufunc,
} else {
/* Find the specified ufunc inner loop, and fill in the dtypes */
retval = type_tuple_type_resolver(ufunc, type_tup,
- operands, casting, any_object, out_dtypes);
+ operands, input_casting, casting, any_object, out_dtypes);
}
return retval;
@@ -558,6 +558,11 @@ PyUFunc_SimpleUniformOperationTypeResolver(
* This is a fast-path, since all descriptors will be identical, mainly
* when only a single descriptor was passed (which would set the out
* one in the tuple), there is no need to check all loops.
+ * Note that this also allows (None, None, float64) to resolve to
+ * (float64, float64, float64), even when the inputs do not match,
+ * i.e. fixing the output part of the signature can fix all of them.
+ * This is necessary to support `nextafter(1., inf, dtype=float32)`,
+ * where it is "clear" we want to cast 1. and inf to float32.
*/
PyArray_Descr *descr = NULL;
if (PyTuple_CheckExact(type_tup) &&
@@ -565,7 +570,12 @@ PyUFunc_SimpleUniformOperationTypeResolver(
for (int i = 0; i < nop; i++) {
PyObject *item = PyTuple_GET_ITEM(type_tup, i);
if (item == Py_None) {
- continue;
+ if (i < ufunc->nin) {
+ continue;
+ }
+ /* All outputs must be set (this could be relaxed) */
+ descr = NULL;
+ break;
}
if (!PyArray_DescrCheck(item)) {
/* Defer to default resolver (will raise an error there) */
@@ -1661,6 +1671,9 @@ ufunc_loop_matches(PyUFuncObject *self,
if (types[i] == NPY_OBJECT && !any_object && self->ntypes > 1) {
return 0;
}
+ if (types[i] == NPY_NOTYPE) {
+ continue; /* Matched by being explicitly specified. */
+ }
/*
* If type num is NPY_VOID and struct dtypes have been passed in,
@@ -1710,6 +1723,9 @@ ufunc_loop_matches(PyUFuncObject *self,
* outputs.
*/
for (i = nin; i < nop; ++i) {
+ if (types[i] == NPY_NOTYPE) {
+ continue; /* Matched by being explicitly specified. */
+ }
if (op[i] != NULL) {
PyArray_Descr *tmp = PyArray_DescrFromType(types[i]);
if (tmp == NULL) {
@@ -1728,7 +1744,6 @@ ufunc_loop_matches(PyUFuncObject *self,
Py_DECREF(tmp);
}
}
-
return 1;
}
@@ -1869,12 +1884,15 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
int n_specified,
int *specified_types,
PyArrayObject **op,
+ NPY_CASTING input_casting,
NPY_CASTING casting,
int any_object,
int use_min_scalar,
PyArray_Descr **out_dtype)
{
int i, j, nin = self->nin, nop = nin + self->nout;
+ assert(n_specified == nop);
+ int types[NPY_MAXARGS];
/* Use this to try to avoid repeating the same userdef loop search */
int last_userdef = -1;
@@ -1907,28 +1925,31 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
return -1;
}
for (; funcdata != NULL; funcdata = funcdata->next) {
- int *types = funcdata->arg_types;
- int matched = 1;
-
- if (n_specified == nop) {
- for (j = 0; j < nop; ++j) {
- if (types[j] != specified_types[j] &&
- specified_types[j] != NPY_NOTYPE) {
- matched = 0;
- break;
- }
+ int *orig_types = funcdata->arg_types;
+
+ /*
+ * Copy the types into an int array for matching
+ * (Mostly duplicated in `type_tuple_type_resolver`)
+ */
+ for (j = 0; j < nop; ++j) {
+ if (specified_types[j] == NPY_NOTYPE) {
+ types[j] = orig_types[j];
+ continue;
}
- } else {
- if (types[nin] != specified_types[0]) {
- matched = 0;
+ if (orig_types[j] != specified_types[j]) {
+ break;
}
+ /* indicate that we do not have to check this type anymore. */
+ types[j] = NPY_NOTYPE;
}
- if (!matched) {
+
+ if (j != nop) {
+ /* no match */
continue;
}
switch (ufunc_loop_matches(self, op,
- casting, casting,
+ input_casting, casting,
any_object, use_min_scalar,
types, NULL,
&no_castable_output, &err_src_typecode,
@@ -1936,7 +1957,19 @@ type_tuple_userloop_type_resolver(PyUFuncObject *self,
/* It works */
case 1:
set_ufunc_loop_data_types(self, op,
- out_dtype, types, NULL);
+ out_dtype, orig_types, NULL);
+ /*
+ * In principle, we only need to validate the
+ * NPY_NOTYPE ones
+ */
+ if (PyUFunc_ValidateCasting(self,
+ casting, op, out_dtype) < 0) {
+ for (j = 0; j < self->nargs; j++) {
+ Py_DECREF(out_dtype[j]);
+ out_dtype[j] = NULL;
+ }
+ return -1;
+ }
return 1;
/* Didn't match */
case 0:
@@ -2069,6 +2102,94 @@ linear_search_type_resolver(PyUFuncObject *self,
return -1;
}
+
+static int
+type_tuple_type_resolver_core(PyUFuncObject *self,
+ PyArrayObject **op,
+ NPY_CASTING input_casting, NPY_CASTING casting,
+ int specified_types[],
+ int any_object,
+ int no_castable_output, int use_min_scalar,
+ PyArray_Descr **out_dtype)
+{
+ int i, j;
+ int nop = self->nargs;
+ int types[NPY_MAXARGS];
+
+ /* For making a better error message on coercion error */
+ char err_dst_typecode = '-', err_src_typecode = '-';
+
+ /* If the ufunc has userloops, search for them. */
+ if (self->userloops) {
+ switch (type_tuple_userloop_type_resolver(self,
+ nop, specified_types,
+ op, input_casting, casting,
+ any_object, use_min_scalar,
+ out_dtype)) {
+ /* Error */
+ case -1:
+ return -1;
+ /* Found matching loop */
+ case 1:
+ return 0;
+ }
+ }
+
+ for (i = 0; i < self->ntypes; ++i) {
+ char *orig_types = self->types + i*self->nargs;
+
+ /*
+ * Check specified types and copy into an int array for matching
+ * (Mostly duplicated in `type_tuple_userloop_type_resolver`)
+ */
+ for (j = 0; j < nop; ++j) {
+ if (specified_types[j] == NPY_NOTYPE) {
+ types[j] = orig_types[j];
+ continue;
+ }
+ if (orig_types[j] != specified_types[j]) {
+ break;
+ }
+ /* indicate that we do not have to check this type anymore. */
+ types[j] = NPY_NOTYPE;
+ }
+ if (j < nop) {
+ /* no match */
+ continue;
+ }
+
+ switch (ufunc_loop_matches(self, op,
+ input_casting, casting,
+ any_object, use_min_scalar,
+ types, NULL,
+ &no_castable_output, &err_src_typecode,
+ &err_dst_typecode)) {
+ case -1:
+ /* Error */
+ return -1;
+ case 0:
+ /* Cannot cast inputs */
+ continue;
+ case 1:
+ /* Success, fill also the NPY_NOTYPE (cast from char to int) */
+ for (j = 0; j < nop; j++) {
+ types[j] = orig_types[j];
+ }
+ set_ufunc_loop_data_types(self, op, out_dtype, types, NULL);
+ /* In principle, we only need to validate the NPY_NOTYPE ones */
+ if (PyUFunc_ValidateCasting(self, casting, op, out_dtype) < 0) {
+ for (j = 0; j < self->nargs; j++) {
+ Py_DECREF(out_dtype[j]);
+ out_dtype[j] = NULL;
+ }
+ return -1;
+ }
+ return 0;
+ }
+ }
+ return -2;
+}
+
/*
* Does a linear search for the inner loop of the ufunc specified by type_tup.
*
@@ -2079,18 +2200,16 @@ NPY_NO_EXPORT int
type_tuple_type_resolver(PyUFuncObject *self,
PyObject *type_tup,
PyArrayObject **op,
+ NPY_CASTING input_casting,
NPY_CASTING casting,
int any_object,
PyArray_Descr **out_dtype)
{
- int i, j, nin = self->nin, nop = nin + self->nout;
- int specified_types[NPY_MAXARGS], types[NPY_MAXARGS];
+ int nin = self->nin, nop = nin + self->nout;
+ int specified_types[NPY_MAXARGS];
const char *ufunc_name;
int no_castable_output = 0, use_min_scalar;
- /* For making a better error message on coercion error */
- char err_dst_typecode = '-', err_src_typecode = '-';
-
ufunc_name = ufunc_get_name_cstr(self);
use_min_scalar = should_use_min_scalar(nin, op, 0, NULL);
@@ -2112,7 +2231,7 @@ type_tuple_type_resolver(PyUFuncObject *self,
PyErr_SetString(PyExc_RuntimeError, bad_type_tup_msg);
return -1;
}
- for (i = 0; i < nop; ++i) {
+ for (int i = 0; i < nop; ++i) {
PyObject *item = PyTuple_GET_ITEM(type_tup, i);
if (item == Py_None) {
specified_types[i] = NPY_NOTYPE;
@@ -2131,57 +2250,51 @@ type_tuple_type_resolver(PyUFuncObject *self,
return -1;
}
- /* If the ufunc has userloops, search for them. */
- if (self->userloops) {
- switch (type_tuple_userloop_type_resolver(self,
- nop, specified_types,
- op, casting,
- any_object, use_min_scalar,
- out_dtype)) {
- /* Error */
- case -1:
- return -1;
- /* Found matching loop */
- case 1:
- return 0;
- }
- }
-
- for (i = 0; i < self->ntypes; ++i) {
- char *orig_types = self->types + i*self->nargs;
+ int res = type_tuple_type_resolver_core(self,
+ op, input_casting, casting, specified_types, any_object,
+ no_castable_output, use_min_scalar, out_dtype);
- /* Copy the types into an int array for matching */
- for (j = 0; j < nop; ++j) {
- types[j] = orig_types[j];
- }
+ if (res != -2) {
+ return res;
+ }
- for (j = 0; j < nop; ++j) {
- if (types[j] != specified_types[j] &&
- specified_types[j] != NPY_NOTYPE) {
+ /*
+ * When the user passes `dtype=dtype`, it gets translated to
+ * `signature=(None,)*nin + (dtype,)*nout`. If the signature matches that
+ * exactly (could be relaxed but that is not necessary for backcompat),
+ * we also try `signature=(dtype,)*(nin+nout)`.
+ * This used to be the main meaning for `dtype=dtype`, but some calls broke
+ * the expectation, and changing it allows for `dtype=dtype` to be useful
+ * for ufuncs like `np.ldexp` in the future while also normalizing it to
+ * a `signature` early on.
+ */
+ int homogeneous_type = NPY_NOTYPE;
+ if (self->nout > 0) {
+ homogeneous_type = specified_types[nin];
+ for (int i = nin+1; i < nop; i++) {
+ if (specified_types[i] != homogeneous_type) {
+ homogeneous_type = NPY_NOTYPE;
break;
}
}
- if (j < nop) {
- /* no match */
- continue;
+ }
+ if (homogeneous_type != NPY_NOTYPE) {
+ for (int i = 0; i < nin; i++) {
+ if (specified_types[i] != NPY_NOTYPE) {
+ homogeneous_type = NPY_NOTYPE;
+ break;
+ }
+ specified_types[i] = homogeneous_type;
}
+ }
+ if (homogeneous_type != NPY_NOTYPE) {
+ /* Try again with the homogeneous specified types. */
+ res = type_tuple_type_resolver_core(self,
+ op, input_casting, casting, specified_types, any_object,
+ no_castable_output, use_min_scalar, out_dtype);
- switch (ufunc_loop_matches(self, op,
- casting, casting,
- any_object, use_min_scalar,
- types, NULL,
- &no_castable_output, &err_src_typecode,
- &err_dst_typecode)) {
- case -1:
- /* Error */
- return -1;
- case 0:
- /* Cannot cast inputs */
- continue;
- case 1:
- /* Success */
- set_ufunc_loop_data_types(self, op, out_dtype, types, NULL);
- return 0;
+ if (res != -2) {
+ return res;
}
}
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index 1d6ad3358..b11c69852 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -123,6 +123,7 @@ NPY_NO_EXPORT int
type_tuple_type_resolver(PyUFuncObject *self,
PyObject *type_tup,
PyArrayObject **op,
+ NPY_CASTING input_casting,
NPY_CASTING casting,
int any_object,
PyArray_Descr **out_dtype);
diff --git a/numpy/core/tests/test_scalar_methods.py b/numpy/core/tests/test_scalar_methods.py
index 4f5fd2988..3693bba59 100644
--- a/numpy/core/tests/test_scalar_methods.py
+++ b/numpy/core/tests/test_scalar_methods.py
@@ -89,7 +89,8 @@ class TestAsIntegerRatio:
])
def test_roundtrip(self, ftype, frac_vals, exp_vals):
for frac, exp in zip(frac_vals, exp_vals):
- f = np.ldexp(frac, exp, dtype=ftype)
+ f = np.ldexp(ftype(frac), exp)
+ assert f.dtype == ftype
n, d = f.as_integer_ratio()
try:
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 64ecb3780..a47f1df49 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -457,6 +457,34 @@ class TestUfunc:
float_dtype = type(np.dtype(np.float64))
np.add(3, 4, signature=(float_dtype, float_dtype, None))
+ @pytest.mark.parametrize("casting", ["unsafe", "same_kind", "safe"])
+ def test_partial_signature_mismatch(self, casting):
+ # If the second argument matches already, no need to specify it:
+ res = np.ldexp(np.float32(1.), np.int_(2), dtype="d")
+ assert res.dtype == "d"
+ res = np.ldexp(np.float32(1.), np.int_(2), signature=(None, None, "d"))
+ assert res.dtype == "d"
+
+ # ldexp only has a loop for long input as second argument, overriding
+ # the output cannot help with that (no matter the casting)
+ with pytest.raises(TypeError):
+ np.ldexp(1., np.uint64(3), dtype="d")
+ with pytest.raises(TypeError):
+ np.ldexp(1., np.uint64(3), signature=(None, None, "d"))
+
+ def test_use_output_signature_for_all_arguments(self):
+ # Test that providing only `dtype=` or `signature=(None, None, dtype)`
+ # is sufficient if falling back to a homogeneous signature works.
+ # In this case, the `intp, intp -> intp` loop is chosen.
+ res = np.power(1.5, 2.8, dtype=np.intp, casting="unsafe")
+ assert res == 1 # the cast happens first.
+ res = np.power(1.5, 2.8, signature=(None, None, np.intp),
+ casting="unsafe")
+ assert res == 1
+ with pytest.raises(TypeError):
+ # the unsafe casting would normally cause errors though:
+ np.power(1.5, 2.8, dtype=np.intp)
+
def test_signature_errors(self):
with pytest.raises(TypeError,
match="the signature object to ufunc must be a string or"):