summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/upcoming_changes/21130.performance.rst4
-rw-r--r--numpy/core/src/multiarray/item_selection.c16
2 files changed, 15 insertions, 5 deletions
diff --git a/doc/release/upcoming_changes/21130.performance.rst b/doc/release/upcoming_changes/21130.performance.rst
new file mode 100644
index 000000000..70ea7dca4
--- /dev/null
+++ b/doc/release/upcoming_changes/21130.performance.rst
@@ -0,0 +1,4 @@
+Faster ``np.where``
+-------------------
+`numpy.where` is now much faster than previously on unpredictable/random
+input data.
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 086b674c8..1074eb2bd 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -2641,13 +2641,19 @@ PyArray_Nonzero(PyArrayObject *self)
*multi_index++ = j++;
}
}
+ /*
+ * Fallback to a branchless strategy to avoid branch misprediction
+ * stalls that are very expensive on most modern processors.
+ */
else {
- npy_intp j;
- for (j = 0; j < count; ++j) {
- if (*data != 0) {
- *multi_index++ = j;
- }
+ npy_intp * multi_index_end = multi_index + nonzero_count;
+ npy_intp j = 0;
+
+ while (multi_index < multi_index_end) {
+ *multi_index = j;
+ multi_index += *data != 0;
data += stride;
+ ++j;
}
}
}