summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPauli Virtanen <pav@iki.fi>2020-08-20 20:56:18 +0300
committerPauli Virtanen <pav@iki.fi>2020-08-21 22:49:17 +0300
commit9cebb29964a3f8767b433d26aee85d969c85ede6 (patch)
tree2b3bd39f3cfa9b11a968b60cd26d08da25b940dd
parent4cba2d91e1546872d29af6b25ad35947f27e03ac (diff)
downloadnumpy-9cebb29964a3f8767b433d26aee85d969c85ede6.tar.gz
BUG: core: fix ilp64 blas dot/vdot/... for strides > int32 max
Fix overlooked int cast when HAVE_BLAS_ILP64 is defined. It was supposed to cast to CBLAS_INT, not int. Also add a regression test. Move blas_stride() to npy_cblas.h Replace npy_is_aligned by modulo; we're going to call BLAS so no need to micro-optimize integer division here.
-rw-r--r--numpy/core/src/common/npy_cblas.h26
-rw-r--r--numpy/core/src/multiarray/common.h24
-rw-r--r--numpy/core/tests/test_regression.py15
3 files changed, 40 insertions, 25 deletions
diff --git a/numpy/core/src/common/npy_cblas.h b/numpy/core/src/common/npy_cblas.h
index 97308238a..c0441e81e 100644
--- a/numpy/core/src/common/npy_cblas.h
+++ b/numpy/core/src/common/npy_cblas.h
@@ -59,6 +59,32 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
#undef BLASINT
#undef BLASNAME
+
+/*
+ * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done
+ * (BLAS won't handle negative or zero strides the way we want).
+ */
+static NPY_INLINE CBLAS_INT
+blas_stride(npy_intp stride, unsigned itemsize)
+{
+ /*
+ * Should probably check pointer alignment also, but this may cause
+ * problems if we require complex to be 16 byte aligned.
+ */
+ if (stride > 0 && (stride % itemsize) == 0) {
+ stride /= itemsize;
+#ifndef HAVE_BLAS_ILP64
+ if (stride <= INT_MAX) {
+#else
+ if (stride <= NPY_MAX_INT64) {
+#endif
+ return stride;
+ }
+ }
+ return 0;
+}
+
+
#ifdef __cplusplus
}
#endif
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 793cefaf8..e0b143604 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -293,30 +293,6 @@ npy_memchr(char * haystack, char needle,
}
/*
- * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done
- * (BLAS won't handle negative or zero strides the way we want).
- */
-static NPY_INLINE int
-blas_stride(npy_intp stride, unsigned itemsize)
-{
- /*
- * Should probably check pointer alignment also, but this may cause
- * problems if we require complex to be 16 byte aligned.
- */
- if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
- stride /= itemsize;
-#ifndef HAVE_BLAS_ILP64
- if (stride <= INT_MAX) {
-#else
- if (stride <= NPY_MAX_INT64) {
-#endif
- return stride;
- }
- }
- return 0;
-}
-
-/*
* Define a chunksize for CBLAS. CBLAS counts in integers.
*/
#if NPY_MAX_INTP > INT_MAX
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index f778d4d7c..a97198076 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -14,7 +14,7 @@ from numpy.testing import (
assert_raises_regex, assert_warns, suppress_warnings,
_assert_valid_refcount, HAS_REFCOUNT,
)
-from numpy.testing._private.utils import _no_tracing
+from numpy.testing._private.utils import _no_tracing, requires_memory
from numpy.compat import asbytes, asunicode, pickle
try:
@@ -2501,3 +2501,16 @@ class TestRegression:
formats=[np.int64, np.int64]))
descr = np.array((1, 1), dtype=dt).__array_interface__['descr']
assert descr == [('', '|V8')] # instead of [(b'', '|V8')]
+
+ @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python')
+ @requires_memory(free_bytes=9e9)
+ def test_dot_big_stride(self):
+ # gh-17111
+ # blas stride = stride//itemsize > int32 max
+ int32_max = np.iinfo(np.int32).max
+ n = int32_max + 3
+ a = np.empty([n], dtype=np.float32)
+ b = a[::n-1]
+ b[...] = 1
+ assert b.strides[0] > int32_max * b.dtype.itemsize
+ assert np.dot(b, b) == 2.0