diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-10-23 22:32:05 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-10-25 18:22:21 +0200 |
commit | ef229342f2bc0fdbee3264cb245d6c0a4ebfc1ff (patch) | |
tree | 3b27be83a2ac11f95a7f389f06eb39860d0f4dc4 | |
parent | cfd81489a61c5144c9a77bb0494877817acd24d3 (diff) | |
download | numpy-ef229342f2bc0fdbee3264cb245d6c0a4ebfc1ff.tar.gz |
MAINT: remove trailing zero count path in npy_memchr
its only actually faster than just running through the trailing bytewise
loop if the needle is in the last byte.
-rw-r--r-- | numpy/core/setup_common.py | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.h | 17 |
2 files changed, 8 insertions, 10 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 4633aef84..bad3607fa 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -116,7 +116,6 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_bswap32", '5u'), ("__builtin_bswap64", '5u'), ("__builtin_expect", '5, 0'), - ("__builtin_ctz", '5'), ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ] diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index cc8c81936..5d77170ea 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -137,19 +137,18 @@ npy_memchr(char * haystack, char needle, } else { /* usually find elements to skip path */ -#if (defined HAVE___BUILTIN_CTZ && defined NPY_CPU_HAVE_UNALIGNED_ACCESS) +#if defined NPY_CPU_HAVE_UNALIGNED_ACCESS if (needle == 0 && stride == 1) { - char * const end = haystack + size; - while (p < end - (size % sizeof(unsigned int))) { + /* iterate until last multiple of 4 */ + char * block_end = haystack + size - (size % sizeof(unsigned int)); + while (p < block_end) { unsigned int v = *(unsigned int*)p; - if (v == 0) { - p += sizeof(unsigned int); - continue; + if (v != 0) { + break; } - p += __builtin_ctz(v) / 8; - *psubloopsize = (p - haystack); - return p; + p += sizeof(unsigned int); } + /* handle rest */ subloopsize = (p - haystack); } #endif |