diff options
| -rw-r--r-- | doc/release/upcoming_changes/21130.performance.rst | 4 | ||||
| -rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 16 |
2 files changed, 15 insertions, 5 deletions
diff --git a/doc/release/upcoming_changes/21130.performance.rst b/doc/release/upcoming_changes/21130.performance.rst new file mode 100644 index 000000000..70ea7dca4 --- /dev/null +++ b/doc/release/upcoming_changes/21130.performance.rst @@ -0,0 +1,4 @@ +Faster ``np.where`` +------------------- +`numpy.where` is now much faster than previously on unpredictable/random +input data. diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 086b674c8..1074eb2bd 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -2641,13 +2641,19 @@ PyArray_Nonzero(PyArrayObject *self) *multi_index++ = j++; } } + /* + * Fallback to a branchless strategy to avoid branch misprediction + * stalls that are very expensive on most modern processors. + */ else { - npy_intp j; - for (j = 0; j < count; ++j) { - if (*data != 0) { - *multi_index++ = j; - } + npy_intp * multi_index_end = multi_index + nonzero_count; + npy_intp j = 0; + + while (multi_index < multi_index_end) { + *multi_index = j; + multi_index += *data != 0; data += stride; + ++j; } } } |
