summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Berg <sebastian@sipsolutions.net>2021-12-04 14:03:02 -0600
committerSebastian Berg <sebastian@sipsolutions.net>2021-12-07 17:13:23 -0600
commit0353fef12215f4d5bb72389974905eb80f0984a8 (patch)
treec0b0f9812903a78b1be094082c8e935e8290c3c1
parent51d99b2a2746ac1ad16c1d2db8da8197e8200367 (diff)
downloadnumpy-0353fef12215f4d5bb72389974905eb80f0984a8.tar.gz
PERF: Fix performance bug in dispatching cache
In promotion cases, the result of the dispatching was not cached (at least not as well as it could be). This lead to large slowdowns in cases where promotion is necessary, one example: a = np.array([3]); b = np.array([3.]) %timeit a + b The fix here is fairly small, and almost a cleanup, since I put the whole `cache=True` logic in the wrong place really. (I currently do not ever cache promoters, to be fair, this could be done but should be unnecessary and may require a bit of thought. Another thing that could be optimized is caching if there is no matching loop to speed up error paths.)
-rw-r--r--numpy/core/src/umath/dispatching.c27
-rw-r--r--numpy/core/src/umath/ufunc_object.c4
2 files changed, 18 insertions, 13 deletions
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index 664288084..4c6b09b80 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -62,7 +62,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
PyArrayObject *const ops[],
PyArray_DTypeMeta *signature[],
PyArray_DTypeMeta *op_dtypes[],
- npy_bool allow_legacy_promotion, npy_bool cache);
+ npy_bool allow_legacy_promotion);
/**
@@ -490,10 +490,9 @@ call_promoter_and_recurse(PyUFuncObject *ufunc, PyObject *promoter,
if (Py_EnterRecursiveCall(" during ufunc promotion.") != 0) {
goto finish;
}
- /* TODO: The caching logic here may need revising: */
resolved_info = promote_and_get_info_and_ufuncimpl(ufunc,
operands, signature, new_op_dtypes,
- /* no legacy promotion */ NPY_FALSE, /* cache */ NPY_TRUE);
+ /* no legacy promotion */ NPY_FALSE);
Py_LeaveRecursiveCall();
@@ -658,7 +657,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
PyArrayObject *const ops[],
PyArray_DTypeMeta *signature[],
PyArray_DTypeMeta *op_dtypes[],
- npy_bool allow_legacy_promotion, npy_bool cache)
+ npy_bool allow_legacy_promotion)
{
/*
* Fetch the dispatching info which consists of the implementation and
@@ -677,8 +676,8 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
}
/*
- * If `info == NULL`, the caching failed, repeat using the full resolution
- * in `resolve_implementation_info`.
+ * If `info == NULL`, loading from cache failed, use the full resolution
+ * in `resolve_implementation_info` (which caches its result on success).
*/
if (info == NULL) {
if (resolve_implementation_info(ufunc,
@@ -691,7 +690,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
* Found the ArrayMethod and NOT promoter. Before returning it
* add it to the cache for faster lookup in the future.
*/
- if (cache && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
+ if (PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
(PyObject **)op_dtypes, info, 0) < 0) {
return NULL;
}
@@ -712,6 +711,11 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
return NULL;
}
else if (info != NULL) {
+ /* Add result to the cache using the original types: */
+ if (PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
+ (PyObject **)op_dtypes, info, 0) < 0) {
+ return NULL;
+ }
return info;
}
}
@@ -735,7 +739,12 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
return NULL;
}
info = promote_and_get_info_and_ufuncimpl(ufunc,
- ops, signature, new_op_dtypes, NPY_FALSE, cacheable);
+ ops, signature, new_op_dtypes, NPY_FALSE);
+ /* Add this to the cache using the original types: */
+ if (cacheable && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache,
+ (PyObject **)op_dtypes, info, 0) < 0) {
+ return NULL;
+ }
for (int i = 0; i < ufunc->nargs; i++) {
Py_XDECREF(new_op_dtypes);
}
@@ -828,7 +837,7 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
}
PyObject *info = promote_and_get_info_and_ufuncimpl(ufunc,
- ops, signature, op_dtypes, allow_legacy_promotion, NPY_TRUE);
+ ops, signature, op_dtypes, allow_legacy_promotion);
if (info == NULL) {
if (!PyErr_Occurred()) {
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 97dc9f00f..9107323b0 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -998,10 +998,6 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
}
if (*allow_legacy_promotion && (!all_scalar && any_scalar)) {
*force_legacy_promotion = should_use_min_scalar(nin, out_op, 0, NULL);
- /*
- * TODO: if this is False, we end up in a "very slow" path that should
- * be avoided. This makes `int_arr + 0.` ~40% slower.
- */
}
/* Convert and fill in output arguments */