summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2013-10-23 22:32:05 +0200
committerJulian Taylor <jtaylor.debian@googlemail.com>2013-10-25 18:22:21 +0200
commitef229342f2bc0fdbee3264cb245d6c0a4ebfc1ff (patch)
tree3b27be83a2ac11f95a7f389f06eb39860d0f4dc4
parentcfd81489a61c5144c9a77bb0494877817acd24d3 (diff)
downloadnumpy-ef229342f2bc0fdbee3264cb245d6c0a4ebfc1ff.tar.gz
MAINT: remove trailing zero count path in npy_memchr
its only actually faster than just running through the trailing bytewise loop if the needle is in the last byte.
-rw-r--r--numpy/core/setup_common.py1
-rw-r--r--numpy/core/src/multiarray/common.h17
2 files changed, 8 insertions, 10 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 4633aef84..bad3607fa 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -116,7 +116,6 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_bswap32", '5u'),
("__builtin_bswap64", '5u'),
("__builtin_expect", '5, 0'),
- ("__builtin_ctz", '5'),
("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
]
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index cc8c81936..5d77170ea 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -137,19 +137,18 @@ npy_memchr(char * haystack, char needle,
}
else {
/* usually find elements to skip path */
-#if (defined HAVE___BUILTIN_CTZ && defined NPY_CPU_HAVE_UNALIGNED_ACCESS)
+#if defined NPY_CPU_HAVE_UNALIGNED_ACCESS
if (needle == 0 && stride == 1) {
- char * const end = haystack + size;
- while (p < end - (size % sizeof(unsigned int))) {
+ /* iterate until last multiple of 4 */
+ char * block_end = haystack + size - (size % sizeof(unsigned int));
+ while (p < block_end) {
unsigned int v = *(unsigned int*)p;
- if (v == 0) {
- p += sizeof(unsigned int);
- continue;
+ if (v != 0) {
+ break;
}
- p += __builtin_ctz(v) / 8;
- *psubloopsize = (p - haystack);
- return p;
+ p += sizeof(unsigned int);
}
+ /* handle rest */
subloopsize = (p - haystack);
}
#endif