diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/setup_common.py | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.h | 21 |
2 files changed, 22 insertions, 0 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index bad3607fa..4633aef84 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -116,6 +116,7 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_bswap32", '5u'), ("__builtin_bswap64", '5u'), ("__builtin_expect", '5, 0'), + ("__builtin_ctz", '5'), ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ] diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index f94bd07d5..3e060de3d 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -1,6 +1,7 @@ #ifndef _NPY_PRIVATE_COMMON_H_ #define _NPY_PRIVATE_COMMON_H_ #include <numpy/npy_common.h> +#include <numpy/npy_cpu.h> #define error_converting(x) (((x) == -1) && PyErr_Occurred()) @@ -109,11 +110,31 @@ npy_memchr(char * haystack, char needle, } if (!invert) { + /* + * this is usually the path to determine elements to process, + * performance less important here. + * memchr has large setup cost if 0 byte is close to start. + */ while (p < end && *p != needle) { p += stride; } } else { + /* usually find elements to skip path */ +#if (defined HAVE___BUILTIN_CTZ && defined NPY_CPU_HAVE_UNALIGNED_ACCESS) + if (needle == 0 && stride == 1) { + while (p < end - ((npy_uintp)end % sizeof(unsigned int))) { + unsigned int v = *(unsigned int*)p; + if (v == 0) { + p += sizeof(unsigned int); + continue; + } + p += __builtin_ctz(v) / 8; + *subloopsize = (p - haystack) / stride; + return p; + } + } +#endif while (p < end && *p == needle) { p += stride; } |