summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/setup_common.py1
-rw-r--r--numpy/core/src/multiarray/common.h46
-rw-r--r--numpy/core/tests/test_api.py8
3 files changed, 25 insertions, 30 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 4633aef84..bad3607fa 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -116,7 +116,6 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_bswap32", '5u'),
("__builtin_bswap64", '5u'),
("__builtin_expect", '5, 0'),
- ("__builtin_ctz", '5'),
("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
]
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 4b23b9442..5d77170ea 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -119,20 +119,10 @@ npy_is_aligned(const void * p, const npy_uintp alignment)
*/
static NPY_INLINE char *
npy_memchr(char * haystack, char needle,
- npy_intp stride, npy_intp size, npy_intp * subloopsize, int invert)
+ npy_intp stride, npy_intp size, npy_intp * psubloopsize, int invert)
{
char * p = haystack;
- char * const end = haystack + size;
- if (stride == 0) {
- if (!invert) {
- p = (*p != needle) ? end : haystack;
- }
- else {
- p = (*p == needle) ? end : haystack;
- }
- *subloopsize = (p - haystack);
- return haystack;
- }
+ npy_intp subloopsize = 0;
if (!invert) {
/*
@@ -140,38 +130,36 @@ npy_memchr(char * haystack, char needle,
* performance less important here.
* memchr has large setup cost if 0 byte is close to start.
*/
- while (p < end && *p != needle) {
+ while (subloopsize < size && *p != needle) {
+ subloopsize++;
p += stride;
}
}
else {
/* usually find elements to skip path */
-#if (defined HAVE___BUILTIN_CTZ && defined NPY_CPU_HAVE_UNALIGNED_ACCESS)
+#if defined NPY_CPU_HAVE_UNALIGNED_ACCESS
if (needle == 0 && stride == 1) {
- while (p < end - (size % sizeof(unsigned int))) {
+ /* iterate until last multiple of 4 */
+ char * block_end = haystack + size - (size % sizeof(unsigned int));
+ while (p < block_end) {
unsigned int v = *(unsigned int*)p;
- if (v == 0) {
- p += sizeof(unsigned int);
- continue;
+ if (v != 0) {
+ break;
}
- p += __builtin_ctz(v) / 8;
- *subloopsize = (p - haystack) / stride;
- return p;
+ p += sizeof(unsigned int);
}
+ /* handle rest */
+ subloopsize = (p - haystack);
}
#endif
- while (p < end && *p == needle) {
+ while (subloopsize < size && *p == needle) {
+ subloopsize++;
p += stride;
}
}
- /* division is very expensive */
- if (NPY_LIKELY(stride == 1)) {
- *subloopsize = (p - haystack);
- }
- else {
- *subloopsize = (p - haystack) / stride;
- }
+ *psubloopsize = subloopsize;
+
return p;
}
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index c4c9ad1b3..a1a3f896c 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -332,6 +332,14 @@ def test_copyto():
assert_raises(TypeError, np.copyto, [1, 2, 3], [2, 3, 4])
def test_copyto_permut():
+ # test explicit overflow case
+ pad = 500
+ l = [True] * pad + [True, True, True, True]
+ r = np.zeros(len(l)-pad)
+ d = np.ones(len(l)-pad)
+ mask = np.array(l)[pad:]
+ np.copyto(r, d, where=mask[::-1])
+
# test all permutation of possible masks, 9 should be sufficient for
# current 4 byte unrolled code
power = 9