summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorPauli Virtanen <pav@iki.fi>2019-11-29 21:32:44 +0200
committerPauli Virtanen <pav@iki.fi>2019-11-29 21:34:57 +0200
commitacf8bcb04408811e6c0c72c4a4101505a14e59db (patch)
tree4667fe2c5d8a9c29a610e1363c814d4897a43b7b /numpy
parent2d75dec5ae997c2b089232b96e4a64b1065e54f5 (diff)
downloadnumpy-acf8bcb04408811e6c0c72c4a4101505a14e59db.tar.gz
ENH: core: use symbol-suffixed 64-bit CBLAS in cblasfuncs, when available
When Numpy is compiled with BLAS64_ enabled, use the 64-bit CBLAS routines to provide cblasfuncs, and dot() et al.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/common/cblasfuncs.c86
1 files changed, 50 insertions, 36 deletions
diff --git a/numpy/core/src/common/cblasfuncs.c b/numpy/core/src/common/cblasfuncs.c
index 39572fed4..14d13a6c7 100644
--- a/numpy/core/src/common/cblasfuncs.c
+++ b/numpy/core/src/common/cblasfuncs.c
@@ -10,10 +10,20 @@
#include <assert.h>
#include <numpy/arrayobject.h>
#include "npy_cblas.h"
+#include "npy_cblas64_.h"
#include "arraytypes.h"
#include "common.h"
+/*
+ * If 64-bit CBLAS with symbol suffix '64_' is available, use it.
+ */
+#ifdef HAVE_CBLAS64_
+#define CBLAS_FUNC(name) name ## 64_
+#else
+#define CBLAS_FUNC(name) name
+#endif
+
static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
@@ -24,28 +34,28 @@ static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
static void
gemm(int typenum, enum CBLAS_ORDER order,
enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
- int m, int n, int k,
- PyArrayObject *A, int lda, PyArrayObject *B, int ldb, PyArrayObject *R)
+ npy_intp m, npy_intp n, npy_intp k,
+ PyArrayObject *A, npy_intp lda, PyArrayObject *B, npy_intp ldb, PyArrayObject *R)
{
const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B);
void *Rdata = PyArray_DATA(R);
- int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
+ npy_intp ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
switch (typenum) {
case NPY_DOUBLE:
- cblas_dgemm(order, transA, transB, m, n, k, 1.,
+ CBLAS_FUNC(cblas_dgemm)(order, transA, transB, m, n, k, 1.,
Adata, lda, Bdata, ldb, 0., Rdata, ldc);
break;
case NPY_FLOAT:
- cblas_sgemm(order, transA, transB, m, n, k, 1.f,
+ CBLAS_FUNC(cblas_sgemm)(order, transA, transB, m, n, k, 1.f,
Adata, lda, Bdata, ldb, 0.f, Rdata, ldc);
break;
case NPY_CDOUBLE:
- cblas_zgemm(order, transA, transB, m, n, k, oneD,
+ CBLAS_FUNC(cblas_zgemm)(order, transA, transB, m, n, k, oneD,
Adata, lda, Bdata, ldb, zeroD, Rdata, ldc);
break;
case NPY_CFLOAT:
- cblas_cgemm(order, transA, transB, m, n, k, oneF,
+ CBLAS_FUNC(cblas_cgemm)(order, transA, transB, m, n, k, oneF,
Adata, lda, Bdata, ldb, zeroF, Rdata, ldc);
break;
}
@@ -57,29 +67,29 @@ gemm(int typenum, enum CBLAS_ORDER order,
*/
static void
gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
- PyArrayObject *A, int lda, PyArrayObject *X, int incX,
+ PyArrayObject *A, npy_intp lda, PyArrayObject *X, npy_intp incX,
PyArrayObject *R)
{
const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X);
void *Rdata = PyArray_DATA(R);
- int m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);
+ npy_intp m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);
switch (typenum) {
case NPY_DOUBLE:
- cblas_dgemv(order, trans, m, n, 1., Adata, lda, Xdata, incX,
+ CBLAS_FUNC(cblas_dgemv)(order, trans, m, n, 1., Adata, lda, Xdata, incX,
0., Rdata, 1);
break;
case NPY_FLOAT:
- cblas_sgemv(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
+ CBLAS_FUNC(cblas_sgemv)(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
0.f, Rdata, 1);
break;
case NPY_CDOUBLE:
- cblas_zgemv(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
+ CBLAS_FUNC(cblas_zgemv)(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
zeroD, Rdata, 1);
break;
case NPY_CFLOAT:
- cblas_cgemv(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
+ CBLAS_FUNC(cblas_cgemv)(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
zeroF, Rdata, 1);
break;
}
@@ -91,19 +101,19 @@ gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
*/
static void
syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
- int n, int k,
- PyArrayObject *A, int lda, PyArrayObject *R)
+ npy_intp n, npy_intp k,
+ PyArrayObject *A, npy_intp lda, PyArrayObject *R)
{
const void *Adata = PyArray_DATA(A);
void *Rdata = PyArray_DATA(R);
- int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
+ npy_intp ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
npy_intp i;
npy_intp j;
switch (typenum) {
case NPY_DOUBLE:
- cblas_dsyrk(order, CblasUpper, trans, n, k, 1.,
+ CBLAS_FUNC(cblas_dsyrk)(order, CblasUpper, trans, n, k, 1.,
Adata, lda, 0., Rdata, ldc);
for (i = 0; i < n; i++) {
@@ -114,7 +124,7 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
}
break;
case NPY_FLOAT:
- cblas_ssyrk(order, CblasUpper, trans, n, k, 1.f,
+ CBLAS_FUNC(cblas_ssyrk)(order, CblasUpper, trans, n, k, 1.f,
Adata, lda, 0.f, Rdata, ldc);
for (i = 0; i < n; i++) {
@@ -125,7 +135,7 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
}
break;
case NPY_CDOUBLE:
- cblas_zsyrk(order, CblasUpper, trans, n, k, oneD,
+ CBLAS_FUNC(cblas_zsyrk)(order, CblasUpper, trans, n, k, oneD,
Adata, lda, zeroD, Rdata, ldc);
for (i = 0; i < n; i++) {
@@ -136,7 +146,7 @@ syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
}
break;
case NPY_CFLOAT:
- cblas_csyrk(order, CblasUpper, trans, n, k, oneF,
+ CBLAS_FUNC(cblas_csyrk)(order, CblasUpper, trans, n, k, oneF,
Adata, lda, zeroF, Rdata, ldc);
for (i = 0; i < n; i++) {
@@ -222,7 +232,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
PyArrayObject *out)
{
PyArrayObject *result = NULL, *out_buf = NULL;
- int j, lda, ldb;
+ npy_intp j, lda, ldb;
npy_intp l;
int nd;
npy_intp ap1stride = 0;
@@ -385,14 +395,15 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
*((double *)PyArray_DATA(ap1));
}
else if (ap1shape != _matrix) {
- cblas_daxpy(l,
+ CBLAS_FUNC(cblas_daxpy)(l,
*((double *)PyArray_DATA(ap2)),
(double *)PyArray_DATA(ap1),
ap1stride/sizeof(double),
(double *)PyArray_DATA(out_buf), 1);
}
else {
- int maxind, oind, i, a1s, outs;
+ int maxind, oind;
+ npy_intp i, a1s, outs;
char *ptr, *optr;
double val;
@@ -405,7 +416,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
a1s = PyArray_STRIDE(ap1, maxind) / sizeof(double);
outs = PyArray_STRIDE(out_buf, maxind) / sizeof(double);
for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
- cblas_daxpy(l, val, (double *)ptr, a1s,
+ CBLAS_FUNC(cblas_daxpy)(l, val, (double *)ptr, a1s,
(double *)optr, outs);
ptr += PyArray_STRIDE(ap1, oind);
optr += PyArray_STRIDE(out_buf, oind);
@@ -423,14 +434,15 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
}
else if (ap1shape != _matrix) {
- cblas_zaxpy(l,
+ CBLAS_FUNC(cblas_zaxpy)(l,
(double *)PyArray_DATA(ap2),
(double *)PyArray_DATA(ap1),
ap1stride/sizeof(npy_cdouble),
(double *)PyArray_DATA(out_buf), 1);
}
else {
- int maxind, oind, i, a1s, outs;
+ int maxind, oind;
+ npy_intp i, a1s, outs;
char *ptr, *optr;
double *pval;
@@ -443,7 +455,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cdouble);
outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cdouble);
for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
- cblas_zaxpy(l, pval, (double *)ptr, a1s,
+ CBLAS_FUNC(cblas_zaxpy)(l, pval, (double *)ptr, a1s,
(double *)optr, outs);
ptr += PyArray_STRIDE(ap1, oind);
optr += PyArray_STRIDE(out_buf, oind);
@@ -456,14 +468,15 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
*((float *)PyArray_DATA(ap1));
}
else if (ap1shape != _matrix) {
- cblas_saxpy(l,
+ CBLAS_FUNC(cblas_saxpy)(l,
*((float *)PyArray_DATA(ap2)),
(float *)PyArray_DATA(ap1),
ap1stride/sizeof(float),
(float *)PyArray_DATA(out_buf), 1);
}
else {
- int maxind, oind, i, a1s, outs;
+ int maxind, oind;
+ npy_intp i, a1s, outs;
char *ptr, *optr;
float val;
@@ -476,7 +489,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
a1s = PyArray_STRIDE(ap1, maxind) / sizeof(float);
outs = PyArray_STRIDE(out_buf, maxind) / sizeof(float);
for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
- cblas_saxpy(l, val, (float *)ptr, a1s,
+ CBLAS_FUNC(cblas_saxpy)(l, val, (float *)ptr, a1s,
(float *)optr, outs);
ptr += PyArray_STRIDE(ap1, oind);
optr += PyArray_STRIDE(out_buf, oind);
@@ -494,14 +507,15 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
}
else if (ap1shape != _matrix) {
- cblas_caxpy(l,
+ CBLAS_FUNC(cblas_caxpy)(l,
(float *)PyArray_DATA(ap2),
(float *)PyArray_DATA(ap1),
ap1stride/sizeof(npy_cfloat),
(float *)PyArray_DATA(out_buf), 1);
}
else {
- int maxind, oind, i, a1s, outs;
+ int maxind, oind;
+ npy_intp i, a1s, outs;
char *ptr, *optr;
float *pval;
@@ -514,7 +528,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cfloat);
outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cfloat);
for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
- cblas_caxpy(l, pval, (float *)ptr, a1s,
+ CBLAS_FUNC(cblas_caxpy)(l, pval, (float *)ptr, a1s,
(float *)optr, outs);
ptr += PyArray_STRIDE(ap1, oind);
optr += PyArray_STRIDE(out_buf, oind);
@@ -537,7 +551,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
/* Matrix vector multiplication -- Level 2 BLAS */
/* lda must be MAX(M,1) */
enum CBLAS_ORDER Order;
- int ap2s;
+ npy_intp ap2s;
if (!PyArray_ISONESEGMENT(ap1)) {
PyObject *new;
@@ -564,7 +578,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
else if (ap1shape != _matrix && ap2shape == _matrix) {
/* Vector matrix multiplication -- Level 2 BLAS */
enum CBLAS_ORDER Order;
- int ap1s;
+ npy_intp ap1s;
if (!PyArray_ISONESEGMENT(ap2)) {
PyObject *new;
@@ -601,7 +615,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
*/
enum CBLAS_ORDER Order;
enum CBLAS_TRANSPOSE Trans1, Trans2;
- int M, N, L;
+ npy_intp M, N, L;
/* Optimization possible: */
/*