summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/setup_common.py1
-rw-r--r--numpy/core/src/multiarray/common.h21
2 files changed, 22 insertions, 0 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index bad3607fa..4633aef84 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -116,6 +116,7 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_bswap32", '5u'),
("__builtin_bswap64", '5u'),
("__builtin_expect", '5, 0'),
+ ("__builtin_ctz", '5'),
("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
]
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index f94bd07d5..3e060de3d 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -1,6 +1,7 @@
#ifndef _NPY_PRIVATE_COMMON_H_
#define _NPY_PRIVATE_COMMON_H_
#include <numpy/npy_common.h>
+#include <numpy/npy_cpu.h>
#define error_converting(x) (((x) == -1) && PyErr_Occurred())
@@ -109,11 +110,31 @@ npy_memchr(char * haystack, char needle,
}
if (!invert) {
+ /*
+ * this is usually the path to determine elements to process,
+ * performance less important here.
+ * memchr has large setup cost if 0 byte is close to start.
+ */
while (p < end && *p != needle) {
p += stride;
}
}
else {
+ /* usually find elements to skip path */
+#if (defined HAVE___BUILTIN_CTZ && defined NPY_CPU_HAVE_UNALIGNED_ACCESS)
+ if (needle == 0 && stride == 1) {
+ while (p < end - ((npy_uintp)end % sizeof(unsigned int))) {
+ unsigned int v = *(unsigned int*)p;
+ if (v == 0) {
+ p += sizeof(unsigned int);
+ continue;
+ }
+ p += __builtin_ctz(v) / 8;
+ *subloopsize = (p - haystack) / stride;
+ return p;
+ }
+ }
+#endif
while (p < end && *p == needle) {
p += stride;
}