diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2021-12-04 14:03:02 -0600 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2021-12-07 17:13:23 -0600 |
commit | 0353fef12215f4d5bb72389974905eb80f0984a8 (patch) | |
tree | c0b0f9812903a78b1be094082c8e935e8290c3c1 | |
parent | 51d99b2a2746ac1ad16c1d2db8da8197e8200367 (diff) | |
download | numpy-0353fef12215f4d5bb72389974905eb80f0984a8.tar.gz |
PERF: Fix performance bug in dispatching cache
In promotion cases, the result of the dispatching was not cached
(at least not as well as it could be). This lead to large slowdowns
in cases where promotion is necessary, one example:
a = np.array([3]); b = np.array([3.])
%timeit a + b
The fix here is fairly small, and almost a cleanup, since I put the
whole `cache=True` logic in the wrong place really.
(I currently do not ever cache promoters, to be fair, this could
be done but should be unnecessary and may require a bit of thought.
Another thing that could be optimized is caching if there is no
matching loop to speed up error paths.)
-rw-r--r-- | numpy/core/src/umath/dispatching.c | 27 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 4 |
2 files changed, 18 insertions, 13 deletions
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c index 664288084..4c6b09b80 100644 --- a/numpy/core/src/umath/dispatching.c +++ b/numpy/core/src/umath/dispatching.c @@ -62,7 +62,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, PyArrayObject *const ops[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *op_dtypes[], - npy_bool allow_legacy_promotion, npy_bool cache); + npy_bool allow_legacy_promotion); /** @@ -490,10 +490,9 @@ call_promoter_and_recurse(PyUFuncObject *ufunc, PyObject *promoter, if (Py_EnterRecursiveCall(" during ufunc promotion.") != 0) { goto finish; } - /* TODO: The caching logic here may need revising: */ resolved_info = promote_and_get_info_and_ufuncimpl(ufunc, operands, signature, new_op_dtypes, - /* no legacy promotion */ NPY_FALSE, /* cache */ NPY_TRUE); + /* no legacy promotion */ NPY_FALSE); Py_LeaveRecursiveCall(); @@ -658,7 +657,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, PyArrayObject *const ops[], PyArray_DTypeMeta *signature[], PyArray_DTypeMeta *op_dtypes[], - npy_bool allow_legacy_promotion, npy_bool cache) + npy_bool allow_legacy_promotion) { /* * Fetch the dispatching info which consists of the implementation and @@ -677,8 +676,8 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, } /* - * If `info == NULL`, the caching failed, repeat using the full resolution - * in `resolve_implementation_info`. + * If `info == NULL`, loading from cache failed, use the full resolution + * in `resolve_implementation_info` (which caches its result on success). */ if (info == NULL) { if (resolve_implementation_info(ufunc, @@ -691,7 +690,7 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, * Found the ArrayMethod and NOT promoter. Before returning it * add it to the cache for faster lookup in the future. */ - if (cache && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache, + if (PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache, (PyObject **)op_dtypes, info, 0) < 0) { return NULL; } @@ -712,6 +711,11 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, return NULL; } else if (info != NULL) { + /* Add result to the cache using the original types: */ + if (PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache, + (PyObject **)op_dtypes, info, 0) < 0) { + return NULL; + } return info; } } @@ -735,7 +739,12 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, return NULL; } info = promote_and_get_info_and_ufuncimpl(ufunc, - ops, signature, new_op_dtypes, NPY_FALSE, cacheable); + ops, signature, new_op_dtypes, NPY_FALSE); + /* Add this to the cache using the original types: */ + if (cacheable && PyArrayIdentityHash_SetItem(ufunc->_dispatch_cache, + (PyObject **)op_dtypes, info, 0) < 0) { + return NULL; + } for (int i = 0; i < ufunc->nargs; i++) { Py_XDECREF(new_op_dtypes); } @@ -828,7 +837,7 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc, } PyObject *info = promote_and_get_info_and_ufuncimpl(ufunc, - ops, signature, op_dtypes, allow_legacy_promotion, NPY_TRUE); + ops, signature, op_dtypes, allow_legacy_promotion); if (info == NULL) { if (!PyErr_Occurred()) { diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 97dc9f00f..9107323b0 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -998,10 +998,6 @@ convert_ufunc_arguments(PyUFuncObject *ufunc, } if (*allow_legacy_promotion && (!all_scalar && any_scalar)) { *force_legacy_promotion = should_use_min_scalar(nin, out_op, 0, NULL); - /* - * TODO: if this is False, we end up in a "very slow" path that should - * be avoided. This makes `int_arr + 0.` ~40% slower. - */ } /* Convert and fill in output arguments */ |