From cfd81489a61c5144c9a77bb0494877817acd24d3 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Wed, 23 Oct 2013 20:51:21 +0200
Subject: BUG: fix handling of negative strides in npy_memchr

the new code did not account for them at all, add the old loops back but
keep the stride 1 optimization for sparse masks.
---
 numpy/core/src/multiarray/common.h | 33 +++++++++++----------------------
 numpy/core/tests/test_api.py       |  8 ++++++++
 2 files changed, 19 insertions(+), 22 deletions(-)

(limited to 'numpy')

diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 4b23b9442..cc8c81936 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -119,20 +119,10 @@ npy_is_aligned(const void * p, const npy_uintp alignment)
  */
 static NPY_INLINE char *
 npy_memchr(char * haystack, char needle,
-           npy_intp stride, npy_intp size, npy_intp * subloopsize, int invert)
+           npy_intp stride, npy_intp size, npy_intp * psubloopsize, int invert)
 {
     char * p = haystack;
-    char * const end = haystack + size;
-    if (stride == 0) {
-        if (!invert) {
-            p = (*p != needle) ? end : haystack;
-        }
-        else {
-            p = (*p == needle) ? end : haystack;
-        }
-        *subloopsize = (p - haystack);
-        return haystack;
-    }
+    npy_intp subloopsize = 0;
 
     if (!invert) {
         /*
@@ -140,7 +130,8 @@ npy_memchr(char * haystack, char needle,
          * performance less important here.
          * memchr has large setup cost if 0 byte is close to start.
          */
-        while (p < end && *p != needle) {
+        while (subloopsize < size && *p != needle) {
+            subloopsize++;
             p += stride;
         }
     }
@@ -148,6 +139,7 @@ npy_memchr(char * haystack, char needle,
         /* usually find elements to skip path */
 #if (defined HAVE___BUILTIN_CTZ && defined NPY_CPU_HAVE_UNALIGNED_ACCESS)
         if (needle == 0 && stride == 1) {
+            char * const end = haystack + size;
             while (p < end - (size % sizeof(unsigned int))) {
                 unsigned int  v = *(unsigned int*)p;
                 if (v == 0) {
@@ -155,23 +147,20 @@ npy_memchr(char * haystack, char needle,
                     continue;
                 }
                 p += __builtin_ctz(v) / 8;
-                *subloopsize = (p - haystack) / stride;
+                *psubloopsize = (p - haystack);
                 return p;
             }
+            subloopsize = (p - haystack);
         }
 #endif
-        while (p < end && *p == needle) {
+        while (subloopsize < size && *p == needle) {
+            subloopsize++;
             p += stride;
         }
     }
 
-    /* division is very expensive */
-    if (NPY_LIKELY(stride == 1)) {
-        *subloopsize = (p - haystack);
-    }
-    else {
-        *subloopsize = (p - haystack) / stride;
-    }
+    *psubloopsize = subloopsize;
+
     return p;
 }
 
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index d642a2237..b5214c803 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -332,6 +332,14 @@ def test_copyto():
     assert_raises(TypeError, np.copyto, [1, 2, 3], [2, 3, 4])
 
 def test_copyto_permut():
+    # test explicit overflow case
+    pad = 500
+    l = [True] * pad + [True, True, True, True]
+    r = np.zeros(len(l)-pad)
+    d = np.ones(len(l)-pad)
+    mask = np.array(l)[pad:]
+    np.copyto(r, d, where=mask[::-1])
+
     # test all permutation of possible masks, 9 should be sufficient for
     # current 4 byte unrolled code
     power = 9
-- 
cgit v1.2.1