summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPauli Virtanen <pav@iki.fi>2019-12-09 20:47:28 +0200
committerPauli Virtanen <pav@iki.fi>2019-12-14 14:14:37 +0200
commitde8a10dc543f69e2111d250c79f1414b70a08686 (patch)
tree1854e376357d28fa05c365556b8575f36b799c54
parentb7f42ea0616812518e04e9e25d89476145c7552c (diff)
downloadnumpy-de8a10dc543f69e2111d250c79f1414b70a08686.tar.gz
BUG: core: use blas_ilp64 also for *_matmul, *_dot, and *_vdot
Changing these to support ILP64 blas was missed in gh-15012
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src16
-rw-r--r--numpy/core/src/multiarray/common.h10
-rw-r--r--numpy/core/src/multiarray/vdot.c16
-rw-r--r--numpy/core/src/umath/matmul.c.src49
4 files changed, 53 insertions, 38 deletions
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index e36b95c00..9e108e3e1 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -3535,17 +3535,17 @@ NPY_NO_EXPORT void
npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
- int is1b = blas_stride(is1, sizeof(@type@));
- int is2b = blas_stride(is2, sizeof(@type@));
+ CBLAS_INT is1b = blas_stride(is1, sizeof(@type@));
+ CBLAS_INT is2b = blas_stride(is2, sizeof(@type@));
if (is1b && is2b)
{
double sum = 0.; /* double for stability */
while (n > 0) {
- int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+ CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
- sum += cblas_@prefix@dot(chunk,
+ sum += CBLAS_FUNC(cblas_@prefix@dot)(chunk,
(@type@ *) ip1, is1b,
(@type@ *) ip2, is2b);
/* use char strides here */
@@ -3584,17 +3584,17 @@ NPY_NO_EXPORT void
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
- int is1b = blas_stride(is1, sizeof(@ctype@));
- int is2b = blas_stride(is2, sizeof(@ctype@));
+ CBLAS_INT is1b = blas_stride(is1, sizeof(@ctype@));
+ CBLAS_INT is2b = blas_stride(is2, sizeof(@ctype@));
if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */
while (n > 0) {
- int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+ CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
@type@ tmp[2];
- cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp);
+ CBLAS_FUNC(cblas_@prefix@dotu_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 487d530a1..7eee9ddc5 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -303,7 +303,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
*/
if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
stride /= itemsize;
+#ifndef HAVE_BLAS_ILP64
if (stride <= INT_MAX) {
+#else
+ if (stride <= NPY_MAX_INT64) {
+#endif
return stride;
}
}
@@ -314,7 +318,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
* Define a chunksize for CBLAS. CBLAS counts in integers.
*/
#if NPY_MAX_INTP > INT_MAX
-# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
+# ifndef HAVE_BLAS_ILP64
+# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
+# else
+# define NPY_CBLAS_CHUNK (NPY_MAX_INT64 / 2 + 1)
+# endif
#else
# define NPY_CBLAS_CHUNK NPY_MAX_INTP
#endif
diff --git a/numpy/core/src/multiarray/vdot.c b/numpy/core/src/multiarray/vdot.c
index 424a21710..9b5d19522 100644
--- a/numpy/core/src/multiarray/vdot.c
+++ b/numpy/core/src/multiarray/vdot.c
@@ -15,17 +15,17 @@ CFLOAT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
- int is1b = blas_stride(is1, sizeof(npy_cfloat));
- int is2b = blas_stride(is2, sizeof(npy_cfloat));
+ CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cfloat));
+ CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cfloat));
if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */
while (n > 0) {
- int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+ CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
float tmp[2];
- cblas_cdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
+ CBLAS_FUNC(cblas_cdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
@@ -66,17 +66,17 @@ CDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
- int is1b = blas_stride(is1, sizeof(npy_cdouble));
- int is2b = blas_stride(is2, sizeof(npy_cdouble));
+ CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cdouble));
+ CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cdouble));
if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */
while (n > 0) {
- int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
+ CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
double tmp[2];
- cblas_zdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
+ CBLAS_FUNC(cblas_zdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
diff --git a/numpy/core/src/umath/matmul.c.src b/numpy/core/src/umath/matmul.c.src
index b5204eca5..c8f7c654d 100644
--- a/numpy/core/src/umath/matmul.c.src
+++ b/numpy/core/src/umath/matmul.c.src
@@ -31,7 +31,11 @@
* -1 to be conservative, in case blas internally uses a for loop with an
* inclusive upper bound
*/
+#ifndef HAVE_BLAS_ILP64
#define BLAS_MAXSIZE (NPY_MAX_INT - 1)
+#else
+#define BLAS_MAXSIZE (NPY_MAX_INT64 - 1)
+#endif
/*
* Determine if a 2d matrix can be used by BLAS
@@ -84,25 +88,25 @@ NPY_NO_EXPORT void
* op: data in c order, m shape
*/
enum CBLAS_ORDER order;
- int M, N, lda;
+ CBLAS_INT M, N, lda;
assert(m <= BLAS_MAXSIZE && n <= BLAS_MAXSIZE);
assert (is_blasable2d(is2_n, sizeof(@typ@), n, 1, sizeof(@typ@)));
- M = (int)m;
- N = (int)n;
+ M = (CBLAS_INT)m;
+ N = (CBLAS_INT)n;
if (is_blasable2d(is1_m, is1_n, m, n, sizeof(@typ@))) {
order = CblasColMajor;
- lda = (int)(is1_m / sizeof(@typ@));
+ lda = (CBLAS_INT)(is1_m / sizeof(@typ@));
}
else {
/* If not ColMajor, caller should have ensured we are RowMajor */
/* will not assert in release mode */
order = CblasRowMajor;
assert(is_blasable2d(is1_n, is1_m, n, m, sizeof(@typ@)));
- lda = (int)(is1_n / sizeof(@typ@));
+ lda = (CBLAS_INT)(is1_n / sizeof(@typ@));
}
- cblas_@prefix@gemv(order, CblasTrans, N, M, @step1@, ip1, lda, ip2,
+ CBLAS_FUNC(cblas_@prefix@gemv)(order, CblasTrans, N, M, @step1@, ip1, lda, ip2,
is2_n / sizeof(@typ@), @step0@, op, op_m / sizeof(@typ@));
}
@@ -117,37 +121,37 @@ NPY_NO_EXPORT void
*/
enum CBLAS_ORDER order = CblasRowMajor;
enum CBLAS_TRANSPOSE trans1, trans2;
- int M, N, P, lda, ldb, ldc;
+ CBLAS_INT M, N, P, lda, ldb, ldc;
assert(m <= BLAS_MAXSIZE && n <= BLAS_MAXSIZE && p <= BLAS_MAXSIZE);
- M = (int)m;
- N = (int)n;
- P = (int)p;
+ M = (CBLAS_INT)m;
+ N = (CBLAS_INT)n;
+ P = (CBLAS_INT)p;
assert(is_blasable2d(os_m, os_p, m, p, sizeof(@typ@)));
- ldc = (int)(os_m / sizeof(@typ@));
+ ldc = (CBLAS_INT)(os_m / sizeof(@typ@));
if (is_blasable2d(is1_m, is1_n, m, n, sizeof(@typ@))) {
trans1 = CblasNoTrans;
- lda = (int)(is1_m / sizeof(@typ@));
+ lda = (CBLAS_INT)(is1_m / sizeof(@typ@));
}
else {
/* If not ColMajor, caller should have ensured we are RowMajor */
/* will not assert in release mode */
assert(is_blasable2d(is1_n, is1_m, n, m, sizeof(@typ@)));
trans1 = CblasTrans;
- lda = (int)(is1_n / sizeof(@typ@));
+ lda = (CBLAS_INT)(is1_n / sizeof(@typ@));
}
if (is_blasable2d(is2_n, is2_p, n, p, sizeof(@typ@))) {
trans2 = CblasNoTrans;
- ldb = (int)(is2_n / sizeof(@typ@));
+ ldb = (CBLAS_INT)(is2_n / sizeof(@typ@));
}
else {
/* If not ColMajor, caller should have ensured we are RowMajor */
/* will not assert in release mode */
assert(is_blasable2d(is2_p, is2_n, p, n, sizeof(@typ@)));
trans2 = CblasTrans;
- ldb = (int)(is2_p / sizeof(@typ@));
+ ldb = (CBLAS_INT)(is2_p / sizeof(@typ@));
}
/*
* Use syrk if we have a case of a matrix times its transpose.
@@ -162,12 +166,14 @@ NPY_NO_EXPORT void
) {
npy_intp i,j;
if (trans1 == CblasNoTrans) {
- cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@,
- ip1, lda, @step0@, op, ldc);
+ CBLAS_FUNC(cblas_@prefix@syrk)(
+ order, CblasUpper, trans1, P, N, @step1@,
+ ip1, lda, @step0@, op, ldc);
}
else {
- cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@,
- ip1, ldb, @step0@, op, ldc);
+ CBLAS_FUNC(cblas_@prefix@syrk)(
+ order, CblasUpper, trans1, P, N, @step1@,
+ ip1, ldb, @step0@, op, ldc);
}
/* Copy the triangle */
for (i = 0; i < P; i++) {
@@ -178,8 +184,9 @@ NPY_NO_EXPORT void
}
else {
- cblas_@prefix@gemm(order, trans1, trans2, M, P, N, @step1@, ip1, lda,
- ip2, ldb, @step0@, op, ldc);
+ CBLAS_FUNC(cblas_@prefix@gemm)(
+ order, trans1, trans2, M, P, N, @step1@, ip1, lda,
+ ip2, ldb, @step0@, op, ldc);
}
}