BUG: fix win32 np.clip slowness

The use of the macro _NPY_CLIP results in multiple re-evaluations of the input arguments. Thus for floating point types, the check of NaNs is performed multiple times. This manifests itself as a slowness on Win32 builds. See #18673.
author: KIU Shueng Chuan <nixchuan@gmail.com> 2021-10-19 14:24:51 +0800
committer: KIU Shueng Chuan <nixchuan@gmail.com> 2021-10-19 14:35:35 +0800
commit: 52b5935ea1ab9a5f1043e7a4af2ced8311affe01 (patch)
tree: 21b2525fc3462ec02d75dda47f06d496b4c1a0c0
parent: dd2eaaabdb0451631e95376ae2b4d319082b438e (diff)
download: numpy-52b5935ea1ab9a5f1043e7a4af2ced8311affe01.tar.gz
1 files changed, 12 insertions, 7 deletions
diff --git a/numpy/core/src/umath/clip.c.src b/numpy/core/src/umath/clip.c.src
index bc966b7ac..48786d4a2 100644
--- a/numpy/core/src/umath/clip.c.src
+++ b/numpy/core/src/umath/clip.c.src
@@ -76,9 +76,6 @@
  *         npy_datetime, npy_timedelta#
  */
 
-#define _NPY_CLIP(x, min, max) \
-    _NPY_@name@_MIN(_NPY_@name@_MAX((x), (min)), (max))
-
 NPY_NO_EXPORT void
 @name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
@@ -95,25 +92,33 @@ NPY_NO_EXPORT void
         /* contiguous, branch to let the compiler optimize */
         if (is1 == sizeof(@type@) && os1 == sizeof(@type@)) {
             for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
-                *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val);
+                @type@ t = *(@type@ *)ip1;
+                t = _NPY_@name@_MAX(t, min_val);
+                t = _NPY_@name@_MIN(t, max_val);
+                *(@type@ *)op1 = t;
             }
         }
         else {
             for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
-                *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val);
+                @type@ t = *(@type@ *)ip1;
+                t = _NPY_@name@_MAX(t, min_val);
+                t = _NPY_@name@_MIN(t, max_val);
+                *(@type@ *)op1 = t;
             }
         }
     }
     else {
         TERNARY_LOOP {
-            *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, *(@type@ *)ip2, *(@type@ *)ip3);
+            @type@ t = *(@type@ *)ip1;
+            t = _NPY_@name@_MAX(t, *(@type@ *)ip2);
+            t = _NPY_@name@_MIN(t, *(@type@ *)ip3);
+            *(@type@ *)op1 = t;
         }
     }
     npy_clear_floatstatus_barrier((char*)dimensions);
 }
 
 // clean up the macros we defined above
-#undef _NPY_CLIP
 #undef _NPY_@name@_MAX
 #undef _NPY_@name@_MIN
author	KIU Shueng Chuan <nixchuan@gmail.com>	2021-10-19 14:24:51 +0800
committer	KIU Shueng Chuan <nixchuan@gmail.com>	2021-10-19 14:35:35 +0800
commit	52b5935ea1ab9a5f1043e7a4af2ced8311affe01 (patch)
tree	21b2525fc3462ec02d75dda47f06d496b4c1a0c0
parent	dd2eaaabdb0451631e95376ae2b4d319082b438e (diff)
download	numpy-52b5935ea1ab9a5f1043e7a4af2ced8311affe01.tar.gz