diff options
author | Pauli Virtanen <pav@iki.fi> | 2020-08-20 20:56:18 +0300 |
---|---|---|
committer | Pauli Virtanen <pav@iki.fi> | 2020-08-21 22:49:17 +0300 |
commit | 9cebb29964a3f8767b433d26aee85d969c85ede6 (patch) | |
tree | 2b3bd39f3cfa9b11a968b60cd26d08da25b940dd | |
parent | 4cba2d91e1546872d29af6b25ad35947f27e03ac (diff) | |
download | numpy-9cebb29964a3f8767b433d26aee85d969c85ede6.tar.gz |
BUG: core: fix ilp64 blas dot/vdot/... for strides > int32 max
Fix overlooked int cast when HAVE_BLAS_ILP64 is defined.
It was supposed to cast to CBLAS_INT, not int.
Also add a regression test.
Move blas_stride() to npy_cblas.h
Replace npy_is_aligned by modulo; we're going to call BLAS so no need to
micro-optimize integer division here.
-rw-r--r-- | numpy/core/src/common/npy_cblas.h | 26 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.h | 24 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 15 |
3 files changed, 40 insertions, 25 deletions
diff --git a/numpy/core/src/common/npy_cblas.h b/numpy/core/src/common/npy_cblas.h index 97308238a..c0441e81e 100644 --- a/numpy/core/src/common/npy_cblas.h +++ b/numpy/core/src/common/npy_cblas.h @@ -59,6 +59,32 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #undef BLASINT #undef BLASNAME + +/* + * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done + * (BLAS won't handle negative or zero strides the way we want). + */ +static NPY_INLINE CBLAS_INT +blas_stride(npy_intp stride, unsigned itemsize) +{ + /* + * Should probably check pointer alignment also, but this may cause + * problems if we require complex to be 16 byte aligned. + */ + if (stride > 0 && (stride % itemsize) == 0) { + stride /= itemsize; +#ifndef HAVE_BLAS_ILP64 + if (stride <= INT_MAX) { +#else + if (stride <= NPY_MAX_INT64) { +#endif + return stride; + } + } + return 0; +} + + #ifdef __cplusplus } #endif diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 793cefaf8..e0b143604 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -293,30 +293,6 @@ npy_memchr(char * haystack, char needle, } /* - * Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done - * (BLAS won't handle negative or zero strides the way we want). - */ -static NPY_INLINE int -blas_stride(npy_intp stride, unsigned itemsize) -{ - /* - * Should probably check pointer alignment also, but this may cause - * problems if we require complex to be 16 byte aligned. - */ - if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) { - stride /= itemsize; -#ifndef HAVE_BLAS_ILP64 - if (stride <= INT_MAX) { -#else - if (stride <= NPY_MAX_INT64) { -#endif - return stride; - } - } - return 0; -} - -/* * Define a chunksize for CBLAS. CBLAS counts in integers. */ #if NPY_MAX_INTP > INT_MAX diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index f778d4d7c..a97198076 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -14,7 +14,7 @@ from numpy.testing import ( assert_raises_regex, assert_warns, suppress_warnings, _assert_valid_refcount, HAS_REFCOUNT, ) -from numpy.testing._private.utils import _no_tracing +from numpy.testing._private.utils import _no_tracing, requires_memory from numpy.compat import asbytes, asunicode, pickle try: @@ -2501,3 +2501,16 @@ class TestRegression: formats=[np.int64, np.int64])) descr = np.array((1, 1), dtype=dt).__array_interface__['descr'] assert descr == [('', '|V8')] # instead of [(b'', '|V8')] + + @pytest.mark.skipif(sys.maxsize < 2 ** 31 + 1, reason='overflows 32-bit python') + @requires_memory(free_bytes=9e9) + def test_dot_big_stride(self): + # gh-17111 + # blas stride = stride//itemsize > int32 max + int32_max = np.iinfo(np.int32).max + n = int32_max + 3 + a = np.empty([n], dtype=np.float32) + b = a[::n-1] + b[...] = 1 + assert b.strides[0] > int32_max * b.dtype.itemsize + assert np.dot(b, b) == 2.0 |