diff options
author | KIU Shueng Chuan <nixchuan@gmail.com> | 2021-10-19 14:24:51 +0800 |
---|---|---|
committer | KIU Shueng Chuan <nixchuan@gmail.com> | 2021-10-19 14:35:35 +0800 |
commit | 52b5935ea1ab9a5f1043e7a4af2ced8311affe01 (patch) | |
tree | 21b2525fc3462ec02d75dda47f06d496b4c1a0c0 | |
parent | dd2eaaabdb0451631e95376ae2b4d319082b438e (diff) | |
download | numpy-52b5935ea1ab9a5f1043e7a4af2ced8311affe01.tar.gz |
BUG: fix win32 np.clip slowness
The use of the macro _NPY_CLIP results in multiple re-evaluations of the
input arguments. Thus for floating point types, the check of NaNs is
performed multiple times.
This manifests itself as a slowness on Win32 builds. See #18673.
-rw-r--r-- | numpy/core/src/umath/clip.c.src | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/numpy/core/src/umath/clip.c.src b/numpy/core/src/umath/clip.c.src index bc966b7ac..48786d4a2 100644 --- a/numpy/core/src/umath/clip.c.src +++ b/numpy/core/src/umath/clip.c.src @@ -76,9 +76,6 @@ * npy_datetime, npy_timedelta# */ -#define _NPY_CLIP(x, min, max) \ - _NPY_@name@_MIN(_NPY_@name@_MAX((x), (min)), (max)) - NPY_NO_EXPORT void @name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -95,25 +92,33 @@ NPY_NO_EXPORT void /* contiguous, branch to let the compiler optimize */ if (is1 == sizeof(@type@) && os1 == sizeof(@type@)) { for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val); + @type@ t = *(@type@ *)ip1; + t = _NPY_@name@_MAX(t, min_val); + t = _NPY_@name@_MIN(t, max_val); + *(@type@ *)op1 = t; } } else { for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val); + @type@ t = *(@type@ *)ip1; + t = _NPY_@name@_MAX(t, min_val); + t = _NPY_@name@_MIN(t, max_val); + *(@type@ *)op1 = t; } } } else { TERNARY_LOOP { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, *(@type@ *)ip2, *(@type@ *)ip3); + @type@ t = *(@type@ *)ip1; + t = _NPY_@name@_MAX(t, *(@type@ *)ip2); + t = _NPY_@name@_MIN(t, *(@type@ *)ip3); + *(@type@ *)op1 = t; } } npy_clear_floatstatus_barrier((char*)dimensions); } // clean up the macros we defined above -#undef _NPY_CLIP #undef _NPY_@name@_MAX #undef _NPY_@name@_MIN |