diff options
-rw-r--r-- | scipy/base/code_generators/generate_array_api.py | 4 | ||||
-rw-r--r-- | scipy/base/include/scipy/arrayobject.h | 26 | ||||
-rw-r--r-- | scipy/base/src/arraymethods.c | 6 | ||||
-rw-r--r-- | scipy/base/src/arrayobject.c | 159 | ||||
-rw-r--r-- | scipy/base/src/arraytypes.inc.src | 10 | ||||
-rw-r--r-- | scipy/base/src/multiarraymodule.c | 6 | ||||
-rw-r--r-- | scipy/corelib/blasdot/_dotblas.c | 1268 |
7 files changed, 805 insertions, 674 deletions
diff --git a/scipy/base/code_generators/generate_array_api.py b/scipy/base/code_generators/generate_array_api.py index 1f6460cde..277f2542c 100644 --- a/scipy/base/code_generators/generate_array_api.py +++ b/scipy/base/code_generators/generate_array_api.py @@ -60,6 +60,10 @@ objectapi_list = [ """, 'ObjectType','PyObject *, int','int'), + (r""" + """, + 'ArrayType','PyObject *, PyArray_Typecode *, PyArray_Typecode *','void'), + (r"""Return type typecode from array scalar. """, 'TypecodeFromScalar','PyObject *, PyArray_Typecode *','void'), diff --git a/scipy/base/include/scipy/arrayobject.h b/scipy/base/include/scipy/arrayobject.h index 7136f5c0b..ce6a000b7 100644 --- a/scipy/base/include/scipy/arrayobject.h +++ b/scipy/base/include/scipy/arrayobject.h @@ -680,33 +680,23 @@ typedef Py_uintptr_t uintp; #define PyDimMem_RENEW(ptr,size) ((intp *)realloc(ptr,size*sizeof(intp))) -struct PyArrayObject; /* Forward */ - -typedef PyObject *(PyArray_UnaryFunc)(struct PyArrayObject *, PyObject *); -typedef PyObject *(PyArray_BinaryFunc)(struct PyArrayObject *, PyObject *, \ - PyObject *); - /* These must deal with unaligned and unbyteswapped data if necessary */ -typedef PyObject * (PyArray_GetItemFunc) (char *, struct PyArrayObject *); -typedef int (PyArray_SetItemFunc)(PyObject *, char *, struct PyArrayObject *); +typedef PyObject * (PyArray_GetItemFunc) (char *, void *); +typedef int (PyArray_SetItemFunc)(PyObject *, char *, void *); -typedef int (PyArray_CompareFunc)(const void *, const void *, - struct PyArrayObject *); +typedef int (PyArray_CompareFunc)(const void *, const void *, void *); typedef int (PyArray_CopySwapNFunc)(void *, void *, intp, int, int); typedef int (PyArray_CopySwapFunc)(void *, void *, int, int); -typedef int (PyArray_NonzeroFunc)(void *, struct PyArrayObject *); +typedef int (PyArray_NonzeroFunc)(void *, void *); /* These assume aligned and byteswapped data -- a buffer will be used before or contiguous data will be obtained */ -typedef int (PyArray_ArgFunc)(void*, intp, intp*, struct PyArrayObject *); -typedef int (PyArray_DotFunc)(char *, int, char *, int, - char *, int, struct PyArrayObject *); -typedef int (PyArray_VectorUnaryFunc)(void *, void *, intp, - struct PyArrayObject *, - struct PyArrayObject *); -typedef int (PyArray_ScanFunc)(FILE *, void *, int, char *); +typedef int (PyArray_ArgFunc)(void*, intp, intp*, void *); +typedef int (PyArray_DotFunc)(char *, int, char *, int, char *, int, void *); +typedef int (PyArray_VectorUnaryFunc)(void *, void *, intp, void *, void *); +typedef int (PyArray_ScanFunc)(FILE *, void *, int, char *, void *); typedef struct { diff --git a/scipy/base/src/arraymethods.c b/scipy/base/src/arraymethods.c index 6cdd4b454..2826dc807 100644 --- a/scipy/base/src/arraymethods.c +++ b/scipy/base/src/arraymethods.c @@ -737,7 +737,7 @@ array_setstate(PyArrayObject *self, PyObject *args) if (typecode.type_num != PyArray_OBJECT) { self->data = datastr; - self->base = PyTuple_GET_ITEM(PyTuple_GET_ITEM(args, 0), 3); + self->base = rawdata; Py_INCREF(self->base); } else { @@ -777,7 +777,7 @@ PyArray_Dump(PyObject *self, PyObject *file, int protocol) if (file==NULL) return -1; } else Py_INCREF(file); - ret = PyObject_CallMethod(cpick, "dump", "NNi", self, + ret = PyObject_CallMethod(cpick, "dump", "OOi", self, file, protocol); Py_XDECREF(ret); Py_DECREF(file); @@ -795,7 +795,7 @@ PyArray_Dumps(PyObject *self, int protocol) cpick = PyImport_ImportModule("cPickle"); if (cpick==NULL) return NULL; - ret = PyObject_CallMethod(cpick, "dumps", "Ni", self, protocol); + ret = PyObject_CallMethod(cpick, "dumps", "Oi", self, protocol); Py_DECREF(cpick); return ret; } diff --git a/scipy/base/src/arrayobject.c b/scipy/base/src/arrayobject.c index 7e90358da..05dfc35ed 100644 --- a/scipy/base/src/arrayobject.c +++ b/scipy/base/src/arrayobject.c @@ -4309,86 +4309,163 @@ discover_dimensions(PyObject *s, int nd, intp *d, int check_it) return 0; } +static void +_array_small_type(int chktype, int mintype, int chksize, int minsize, + PyArray_Typecode *outtype) +{ + outtype->type_num = MAX(chktype, mintype); + if (PyTypeNum_ISFLEXIBLE(outtype->type_num) && \ + PyTypeNum_ISFLEXIBLE(mintype)) { + /* Handle string->unicode case separately + because string itemsize is twice as large */ + if (outtype->type_num == PyArray_UNICODE && + mintype == PyArray_STRING) { + outtype->itemsize = MAX(chksize, 2*minsize); + } + else { + outtype->itemsize = MAX(chksize, minsize); + } + } + else { + outtype->itemsize = chksize; + } + return; +} -static int -array_objecttype(PyObject *op, int minimum_type, int max) +static void +_array_find_type(PyObject *op, PyArray_Typecode *minitype, + PyArray_Typecode *outtype, int max) { int l; PyObject *ip; - int result; - - if (minimum_type == -1) return -1; + int chktype=0; + int chksize=0; + int mintype, minsize; + + if (minitype == NULL) { + mintype = PyArray_BOOL; + minsize = sizeof(bool); + } + else { + mintype = minitype->type_num; + minsize = minitype->itemsize; + } - if (max < 0) return PyArray_OBJECT; + + if (max < 0 || mintype == -1) goto deflt; - if (PyArray_Check(op)) - return MAX(PyArray_TYPE(op), minimum_type); + if (PyArray_Check(op)) { + chktype = PyArray_TYPE(op); + chksize = PyArray_ITEMSIZE(op); + goto finish; + } + + if (PyArray_IsScalar(op, Generic)) { + PyArray_TypecodeFromScalar(op, outtype); + chktype = outtype->type_num; + chksize = outtype->itemsize; + goto finish; + } + if (PyObject_HasAttrString(op, "__array__")) { ip = PyObject_CallMethod(op, "__array__", NULL); - if(ip != NULL) { - result = MAX(minimum_type, PyArray_TYPE(ip)); - Py_DECREF(ip); - return result; + if(ip && PyArray_Check(ip)) { + chktype = PyArray_TYPE(ip); + chksize = PyArray_ITEMSIZE(ip); + goto finish; } } - + if (PyObject_HasAttrString(op, "__array_typestr__")) { - PyArray_Typecode type = {PyArray_NOTYPE, 0, 0}; int swap=0, res; ip = PyObject_GetAttrString(op, "__array_typestr__"); if (ip && PyString_Check(ip)) { res = _array_typecode_fromstr(PyString_AS_STRING(ip), - &swap, &type); + &swap, outtype); if (res >= 0) { Py_DECREF(ip); - return MAX(minimum_type, type.type_num); + chktype = outtype->type_num; + chksize = outtype->itemsize; + goto finish; } } Py_XDECREF(ip); } - + if (PyString_Check(op)) { - return MAX(minimum_type, (int)PyArray_STRING); + chktype = PyArray_STRING; + chksize = PyString_GET_SIZE(op); + goto finish; } if (PyUnicode_Check(op)) { - return MAX(minimum_type, (int)PyArray_UNICODE); + chktype = PyArray_UNICODE; + chksize = PyUnicode_GET_DATA_SIZE(op); + goto finish; } - if (PyInstance_Check(op)) return PyArray_OBJECT; + if (PyBuffer_Check(op)) { + chktype = PyArray_VOID; + chksize = op->ob_type->tp_as_sequence->sq_length(op); + PyErr_Clear(); + goto finish; + } + if (PyInstance_Check(op)) goto deflt; + if (PySequence_Check(op)) { + PyArray_Typecode newtype = {mintype, minsize, 0}; l = PyObject_Length(op); if (l < 0 && PyErr_Occurred()) { PyErr_Clear(); - return (int)PyArray_OBJECT; + goto deflt; + } + if (l == 0 && mintype == 0) { + newtype.type_num = PyArray_INTP; + newtype.itemsize = sizeof(intp); } - if (l == 0 && minimum_type == 0) - minimum_type = PyArray_INTP; while (--l >= 0) { ip = PySequence_GetItem(op, l); if (ip==NULL) { PyErr_Clear(); - return (int) PyArray_OBJECT; + goto deflt; } - minimum_type = array_objecttype(ip, minimum_type, - max-1); + _array_find_type(ip, &newtype, outtype, max-1); + _array_small_type(outtype->type_num, + newtype.type_num, + outtype->itemsize, + newtype.itemsize, + &newtype); Py_DECREF(ip); } - return minimum_type; + chktype = newtype.type_num; + chksize = newtype.itemsize; + goto finish; } - if (PyInt_Check(op) || PyLong_Check(op)) { - return MAX(minimum_type, (int) (PyArray_INTP)); + if (PyInt_Check(op)) { + chktype = PyArray_LONG; + chksize = sizeof(long); + goto finish; } else if (PyFloat_Check(op)) { - return MAX(minimum_type, (int) (PyArray_DOUBLE)); + chktype = PyArray_DOUBLE; + chksize = sizeof(double); + goto finish; } else if (PyComplex_Check(op)) { - return MAX(minimum_type, - (int)(PyArray_CDOUBLE)); - } else { - return (int)PyArray_OBJECT; + chktype = PyArray_CDOUBLE; + chksize = sizeof(cdouble); + goto finish; } + + deflt: + chktype = PyArray_OBJECT; + chksize = sizeof(void *); + + finish: + _array_small_type(chktype, mintype, chksize, minsize, + outtype); + return; } static int @@ -5059,7 +5136,7 @@ array_fromobject(PyObject *op, PyArray_Typecode *typecode, int min_depth, } else { if (type == PyArray_NOTYPE) { - typecode->type_num = array_objecttype(op, 0, MAX_DIMS); + _array_find_type(op, NULL, typecode, MAX_DIMS); } if (PySequence_Check(op)) r = Array_FromSequence(op, typecode, @@ -5096,11 +5173,21 @@ array_fromobject(PyObject *op, PyArray_Typecode *typecode, int min_depth, return r; } +static void +PyArray_ArrayType(PyObject *op, PyArray_Typecode *intype, + PyArray_Typecode *outtype) +{ + _array_find_type(op, intype, outtype, MAX_DIMS); + return; +} static int PyArray_ObjectType(PyObject *op, int minimum_type) { - return array_objecttype(op, minimum_type, MAX_DIMS); + PyArray_Typecode intype, outtype; + intype.type_num = minimum_type; + _array_find_type(op, &intype, &outtype, MAX_DIMS); + return outtype.type_num; } diff --git a/scipy/base/src/arraytypes.inc.src b/scipy/base/src/arraytypes.inc.src index ebcb2b22e..d2fc04129 100644 --- a/scipy/base/src/arraytypes.inc.src +++ b/scipy/base/src/arraytypes.inc.src @@ -531,7 +531,7 @@ static void #format="%hd","%hu","%d","%u","%ld","%lu",LONGLONG_FMT,ULONGLONG_FMT,"%f","%lf","%Lf"# */ static int -@fname@_scan (FILE *fp, @type@ *ip, int itemsize, char *sep) +@fname@_scan (FILE *fp, @type@ *ip, int itemsize, char *sep, PyArrayObject *ao) { int endfile; fscanf(fp, @format@, ip); @@ -550,7 +550,7 @@ static int #fname=BOOL,BYTE,UBYTE,CFLOAT,CDOUBLE,CLONGDOUBLE,OBJECT,STRING,UNICODE,VOID# */ static int -@fname@_scan (FILE *fp, void *ip, int itemsize, char *sep) +@fname@_scan (FILE *fp, void *ip, int itemsize, char *sep, PyArrayObject *ao) { PyErr_SetString(PyExc_IOError, "scan not supported for this type"); return -1; @@ -1415,7 +1415,8 @@ static PyArray_Descr @from@_Descr = { { (PyArray_VectorUnaryFunc*)@from@_to_UNICODE, (PyArray_VectorUnaryFunc*)@from@_to_VOID }, - @from@_getitem, @from@_setitem, + (PyArray_GetItemFunc*)@from@_getitem, + (PyArray_SetItemFunc*)@from@_setitem, (PyArray_CompareFunc*)@from@_compare, (PyArray_ArgFunc*)@from@_argmax, (PyArray_DotFunc*)NULL, @@ -1463,7 +1464,8 @@ static PyArray_Descr @from@_Descr = { { (PyArray_VectorUnaryFunc*)@from@_to_UNICODE, (PyArray_VectorUnaryFunc*)@from@_to_VOID }, - @from@_getitem, @from@_setitem, + (PyArray_GetItemFunc*)@from@_getitem, + (PyArray_SetItemFunc*)@from@_setitem, (PyArray_CompareFunc*)@from@_compare, (PyArray_ArgFunc*)@from@_argmax, (PyArray_DotFunc*)@from@_dot, diff --git a/scipy/base/src/multiarraymodule.c b/scipy/base/src/multiarraymodule.c index f2ac60fc7..0ddf75035 100644 --- a/scipy/base/src/multiarraymodule.c +++ b/scipy/base/src/multiarraymodule.c @@ -3242,7 +3242,7 @@ PyArray_FromFile(FILE *fp, PyArray_Typecode *typecode, intp num, char *sep) } dptr = r->data; for (i=0; i < num-1; i++) { - if (scan(fp, dptr, r->itemsize, sep)) + if (scan(fp, dptr, r->itemsize, sep, NULL)) break; nread += 1; dptr += r->itemsize; @@ -3251,7 +3251,7 @@ PyArray_FromFile(FILE *fp, PyArray_Typecode *typecode, intp num, char *sep) Py_DECREF(r); return NULL; } - if (!(scan(fp, dptr, r->itemsize, NULL))) + if (!(scan(fp, dptr, r->itemsize, NULL, NULL))) nread += 1; } else { /* we have to watch for the end of the file and @@ -3281,7 +3281,7 @@ PyArray_FromFile(FILE *fp, PyArray_Typecode *typecode, intp num, char *sep) return NULL; } while (!done) { - done = scan(fp, dptr, r->itemsize, sep); + done = scan(fp, dptr, r->itemsize, sep, NULL); /* end of file reached trying to scan value */ diff --git a/scipy/corelib/blasdot/_dotblas.c b/scipy/corelib/blasdot/_dotblas.c index 51d3184b5..b7d25c098 100644 --- a/scipy/corelib/blasdot/_dotblas.c +++ b/scipy/corelib/blasdot/_dotblas.c @@ -3,7 +3,7 @@ static char module_doc[] = #include "Python.h" -#include "Numeric/arrayobject.h" +#include "scipy/arrayobject.h" #ifndef CBLAS_HEADER #define CBLAS_HEADER <cblas.h> #endif @@ -11,710 +11,758 @@ static char module_doc[] = #include <stdio.h> -/* Defined to be the same as MAX_DIMS in multiarray */ -#define MAX_DIMS 40 - - - -static void FLOAT_dot(void *a, int stridea, void *b, int strideb, void *res, int n) +static void FLOAT_dot(void *a, int stridea, void *b, int strideb, void *res, int n, void *tmp) { - *((float *)res) = cblas_sdot(n, (float *)a, stridea, (float *)b, strideb); + *((float *)res) = cblas_sdot(n, (float *)a, stridea, (float *)b, strideb); } -static void DOUBLE_dot(void *a, int stridea, void *b, int strideb, void *res, int n) +static void DOUBLE_dot(void *a, int stridea, void *b, int strideb, void *res, int n, void *tmp) { - *((double *)res) = cblas_ddot(n, (double *)a, stridea, (double *)b, strideb); + *((double *)res) = cblas_ddot(n, (double *)a, stridea, (double *)b, strideb); } -static void CFLOAT_dot(void *a, int stridea, void *b, int strideb, void *res, int n) +static void CFLOAT_dot(void *a, int stridea, void *b, int strideb, void *res, int n, void *tmp) { - cblas_cdotu_sub(n, (double *)a, stridea, (double *)b, strideb, - (double *)res); + cblas_cdotu_sub(n, (double *)a, stridea, (double *)b, strideb, + (double *)res); } -static void CDOUBLE_dot(void *a, int stridea, void *b, int strideb, void *res, int n) +static void CDOUBLE_dot(void *a, int stridea, void *b, int strideb, void *res, int n, void *tmp) { - cblas_zdotu_sub(n, (double *)a, stridea, (double *)b, strideb, - (double *)res); + cblas_zdotu_sub(n, (double *)a, stridea, (double *)b, strideb, + (double *)res); } +static PyArray_DotFunc *oldFunctions[PyArray_NTYPES]; +static bool altered=false; +static char doc_alterdot[] = "alterdot() changes all dot functions to use blas."; -typedef void (DotFunction)(void *, int, void *, int, void *, int); - -static DotFunction *dotFunctions[PyArray_NTYPES]; +static PyObject * +dotblas_alterdot(PyObject *dummy, PyObject *args) +{ + PyArray_Descr *descr; + + if (!PyArg_ParseTuple(args, "")) return NULL; + /* Replace the dot functions to the ones using blas */ -static char doc_matrixproduct[] = "matrixproduct(a,b)\nReturns the dot product of a and b for arrays of floating point types.\nLike the generic Numeric equivalent the product sum is over\nthe last dimension of a and the second-to-last dimension of b.\nNB: The first argument is not conjugated."; + if (!altered) { + descr = PyArray_DescrFromType(PyArray_FLOAT); + oldFunctions[PyArray_FLOAT] = descr->dotfunc; + descr->dotfunc = FLOAT_dot; + + descr = PyArray_DescrFromType(PyArray_DOUBLE); + oldFunctions[PyArray_DOUBLE] = descr->dotfunc; + descr->dotfunc = DOUBLE_dot; + + descr = PyArray_DescrFromType(PyArray_CFLOAT); + oldFunctions[PyArray_CFLOAT] = descr->dotfunc; + descr->dotfunc = CFLOAT_dot; + + descr = PyArray_DescrFromType(PyArray_CDOUBLE); + oldFunctions[PyArray_CDOUBLE] = descr->dotfunc; + descr->dotfunc = CDOUBLE_dot; + altered = true; + } -static PyObject *dotblas_matrixproduct(PyObject *dummy, PyObject *args) { - PyObject *op1, *op2; - PyArrayObject *ap1, *ap2, *ret; - int i, j, l, lda, ldb, matchDim = -1, otherDim = -1; - int typenum; - int dimensions[MAX_DIMS], nd; - static const float oneF[2] = {1.0, 0.0}; - static const float zeroF[2] = {0.0, 0.0}; - static const double oneD[2] = {1.0, 0.0}; - static const double zeroD[2] = {0.0, 0.0}; + Py_INCREF(Py_None); + return Py_None; +} +static char doc_restoredot[] = "restoredot() restores dots to defaults."; - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; - - /* - * "Matrix product" using the BLAS. - * Only works for float double and complex types. - */ +static PyObject * +dotblas_restoredot(PyObject *dummy, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "")) return NULL; - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); + if (altered) { + descr->dotfunc = oldFunctions[PyArray_FLOAT]; + oldFunctions[PyArray_FLOAT] = NULL; - ret = NULL; - ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); - if (ap1 == NULL) return NULL; - ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); - if (ap2 == NULL) goto fail; + descr->dotfunc = oldFunctions[PyArray_DOUBLE]; + oldFunctions[PyArray_DOUBLE] = NULL; - if (typenum != PyArray_FLOAT && typenum != PyArray_DOUBLE && - typenum != PyArray_CFLOAT && typenum != PyArray_CDOUBLE) { - PyErr_SetString(PyExc_TypeError, "at least one argument must be (possibly complex) float or double"); - goto fail; - } + descr->dotfunc = oldFunctions[PyArray_CFLOAT]; + oldFunctions[PyArray_CFLOAT] = NULL; - - if (ap1->nd < 0 || ap2->nd < 0) { - PyErr_SetString(PyExc_TypeError, "negative dimensioned arrays!"); - goto fail; - } - - if (ap1->nd == 0 || ap2->nd == 0) { - /* One of ap1 or ap2 is a scalar */ - if (ap1->nd == 0) { /* Make ap2 the scalar */ - PyArrayObject *t = ap1; - ap1 = ap2; - ap2 = t; - } - for (l = 1, j = 0; j < ap1->nd; j++) { - dimensions[j] = ap1->dimensions[j]; - l *= dimensions[j]; - } - nd = ap1->nd; - } - else if (ap1->nd <= 2 && ap2->nd <= 2) { - /* Both ap1 and ap2 are vectors or matrices */ - l = ap1->dimensions[ap1->nd-1]; + descr->dotfunc = oldFunctions[PyArray_CDOUBLE]; + oldFunctions[PyArray_CDOUBLE] = NULL; - if (ap2->dimensions[0] != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); - goto fail; - } - nd = ap1->nd+ap2->nd-2; - - if (nd == 1) - dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1]; - else if (nd == 2) { - dimensions[0] = ap1->dimensions[0]; - dimensions[1] = ap2->dimensions[1]; - } - } - else { - /* At least one of ap1 or ap2 has dimension > 2. */ - l = ap1->dimensions[ap1->nd-1]; - matchDim = ap2->nd - 2; - otherDim = ap2->nd - 1; - - if (ap2->dimensions[matchDim] != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); - goto fail; + altered = false; } - nd = ap1->nd+ap2->nd-2; - j = 0; - for(i=0; i<ap1->nd-1; i++) { - dimensions[j++] = ap1->dimensions[i]; - } - for(i=0; i<ap2->nd-2; i++) { - dimensions[j++] = ap2->dimensions[i]; - } - if(ap2->nd > 1) { - dimensions[j++] = ap2->dimensions[ap2->nd-1]; - } - } + Py_INCREF(Py_None); + return Py_None; +} + - ret = (PyArrayObject *)PyArray_FromDims(nd, dimensions, typenum); - if (ret == NULL) goto fail; +static char doc_matrixproduct[] = "matrixproduct(a,b)\nReturns the dot product of a and b for arrays of floating point types.\nLike the generic Numeric equivalent the product sum is over\nthe last dimension of a and the second-to-last dimension of b.\nNB: The first argument is not conjugated."; - if (ap2->nd == 0) { - /* Multiplication by a scalar -- Level 1 BLAS */ - if (typenum == PyArray_DOUBLE) { - cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, 1, - (double *)ret->data, 1); - } - else if (typenum == PyArray_FLOAT) { - cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, 1, - (float *)ret->data, 1); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, 1, - (double *)ret->data, 1); - } - else if (typenum == PyArray_CFLOAT) { - cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, 1, - (float *)ret->data, 1); - } - } - else if (ap1->nd == 1 && ap2->nd == 1) { - /* Dot product between two vectors -- Level 1 BLAS */ - if (typenum == PyArray_DOUBLE) { - double result = cblas_ddot(l, (double *)ap1->data, 1, - (double *)ap2->data, 1); - *((double *)ret->data) = result; - } - else if (typenum == PyArray_FLOAT) { - float result = cblas_sdot(l, (float *)ap1->data, 1, - (float *)ap2->data, 1); - *((float *)ret->data) = result; - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zdotu_sub(l, (double *)ap1->data, 1, - (double *)ap2->data, 1, (double *)ret->data); - } - else if (typenum == PyArray_CFLOAT) { - cblas_cdotu_sub(l, (float *)ap1->data, 1, - (float *)ap2->data, 1, (float *)ret->data); - } - } - else if (ap1->nd == 2 && ap2->nd == 1) { - /* Matrix vector multiplication -- Level 2 BLAS */ - /* lda must be MAX(M,1) */ - lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); - if (typenum == PyArray_DOUBLE) { - cblas_dgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - 1.0, (double *)ap1->data, lda, - (double *)ap2->data, 1, 0.0, (double *)ret->data, 1); - } - else if (typenum == PyArray_FLOAT) { - cblas_sgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - 1.0, (float *)ap1->data, lda, - (float *)ap2->data, 1, 0.0, (float *)ret->data, 1); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - oneD, (double *)ap1->data, lda, - (double *)ap2->data, 1, zeroD, (double *)ret->data, 1); - } - else if (typenum == PyArray_CFLOAT) { - cblas_cgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - oneF, (float *)ap1->data, lda, - (float *)ap2->data, 1, zeroF, (float *)ret->data, 1); - } - } - else if (ap1->nd == 1 && ap2->nd == 2) { - /* Vector matrix multiplication -- Level 2 BLAS */ - lda = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); - if (typenum == PyArray_DOUBLE) { - cblas_dgemv(CblasRowMajor, - CblasTrans, ap2->dimensions[0], ap2->dimensions[1], - 1.0, (double *)ap2->data, lda, - (double *)ap1->data, 1, 0.0, (double *)ret->data, 1); - } - else if (typenum == PyArray_FLOAT) { - cblas_sgemv(CblasRowMajor, - CblasTrans, ap2->dimensions[0], ap2->dimensions[1], - 1.0, (float *)ap2->data, lda, - (float *)ap1->data, 1, 0.0, (float *)ret->data, 1); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zgemv(CblasRowMajor, - CblasTrans, ap2->dimensions[0], ap2->dimensions[1], - oneD, (double *)ap2->data, lda, - (double *)ap1->data, 1, zeroD, (double *)ret->data, 1); - } - else if (typenum == PyArray_CFLOAT) { - cblas_cgemv(CblasRowMajor, - CblasTrans, ap2->dimensions[0], ap2->dimensions[1], - oneF, (float *)ap2->data, lda, - (float *)ap1->data, 1, zeroF, (float *)ret->data, 1); - } - } - else if (ap1->nd == 2 && ap2->nd == 2) { - /* Matrix matrix multiplication -- Level 3 BLAS */ - lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); - ldb = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); - if (typenum == PyArray_DOUBLE) { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, - ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], - 1.0, (double *)ap1->data, lda, - (double *)ap2->data, ldb, - 0.0, (double *)ret->data, ldb); + +static PyObject * +dotblas_matrixproduct(PyObject *dummy, PyObject *args) +{ + PyObject *op1, *op2; + PyArrayObject *ap1, *ap2, *ret; + int i, j, l, lda, ldb, matchDim = -1, otherDim = -1; + int typenum; + int dimensions[MAX_DIMS], nd; + static const float oneF[2] = {1.0, 0.0}; + static const float zeroF[2] = {0.0, 0.0}; + static const double oneD[2] = {1.0, 0.0}; + static const double zeroD[2] = {0.0, 0.0}; + + + if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; + + /* + * "Matrix product" using the BLAS. + * Only works for float double and complex types. + */ + + + typenum = PyArray_ObjectType(op1, 0); + typenum = PyArray_ObjectType(op2, typenum); + + /* This function doesn't handle other types */ + if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE && + typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) { + return PyArray_InnerProduct(op1, op2); } - else if (typenum == PyArray_FLOAT) { - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, - ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], - 1.0, (float *)ap1->data, lda, - (float *)ap2->data, ldb, - 0.0, (float *)ret->data, ldb); + + ret = NULL; + ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); + if (ap1 == NULL) return NULL; + ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); + if (ap2 == NULL) goto fail; + + + if ((ap1->nd > 2) || (ap2->nd > 2)) { + /* This function doesn't handle dimensions greater than 2 -- other + than to ensure the dot function is altered + */ + if (!altered) { + /* need to alter dot product */ + PyObject *tmp1, *tmp2; + tmp1 = PyTuple_New(0); + tmp2 = dotblas_alterdot(NULL, tmp1); + Py_DECREF(tmp1); + Py_DECREF(tmp2); + } + ret = (PyArrayObject *)PyArray_MatrixProduct((PyObject *)ap1, + (PyObject *)ap2); + Py_DECREF(ap1); + Py_DECREF(ap2); + return (PyObject *)ret; } - else if (typenum == PyArray_CDOUBLE) { - cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, - ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], - oneD, (double *)ap1->data, lda, - (double *)ap2->data, ldb, - zeroD, (double *)ret->data, ldb); + + + if (typenum != PyArray_FLOAT && typenum != PyArray_DOUBLE && + typenum != PyArray_CFLOAT && typenum != PyArray_CDOUBLE) { + PyErr_SetString(PyExc_TypeError, "at least one argument must be (possibly complex) float or double"); + goto fail; } - else if (typenum == PyArray_CFLOAT) { - cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, - ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], - oneF, (float *)ap1->data, lda, - (float *)ap2->data, ldb, - zeroF, (float *)ret->data, ldb); - } - } - else { - /* Deal with arrays with dimension greater than two. All we do is copy - * the original multiarraymodule.c/PyArray_InnerProduct but use the BLAS - * one dimensional functions instead of the macro versions in - * multiarraymodule - */ - int is1, is2, is1r, is2r, n1, n2, os, i1, i2; - char *ip1, *ip2, *op; - DotFunction *dot = dotFunctions[(int)(ret->descr->type_num)]; - if (dot == NULL) { - PyErr_SetString(PyExc_ValueError, - "dotblas matrixMultiply not available for this type"); - goto fail; - } - - n1 = PyArray_SIZE(ap1)/l; - n2 = PyArray_SIZE(ap2)/l; - is1 = ap1->strides[ap1->nd-1]/ap1->descr->elsize; - is2 = ap2->strides[matchDim]/ap2->descr->elsize; - if(ap1->nd > 1) - is1r = ap1->strides[ap1->nd-2]; - else - is1r = ap1->strides[ap1->nd-1]; - is2r = ap2->strides[otherDim]; - op = ret->data; - os = ret->descr->elsize; - - ip1 = ap1->data; - for(i1=0; i1<n1; i1++) { - ip2 = ap2->data; - for(i2=0; i2<n2; i2++) { - dot(ip1, is1, ip2, is2, op, l); - ip2 += is2r; - op += os; - } - ip1 += is1r; - } - } - - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); + if (ap1->nd == 0 || ap2->nd == 0) { + /* One of ap1 or ap2 is a scalar */ + if (ap1->nd == 0) { /* Make ap2 the scalar */ + PyArrayObject *t = ap1; + ap1 = ap2; + ap2 = t; + } + for (l = 1, j = 0; j < ap1->nd; j++) { + dimensions[j] = ap1->dimensions[j]; + l *= dimensions[j]; + } + nd = ap1->nd; + } + else { /* (ap1->nd <= 2 && ap2->nd <= 2) */ + /* Both ap1 and ap2 are vectors or matrices */ + l = ap1->dimensions[ap1->nd-1]; + + if (ap2->dimensions[0] != l) { + PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + goto fail; + } + nd = ap1->nd+ap2->nd-2; + + if (nd == 1) + dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[1]; + else if (nd == 2) { + dimensions[0] = ap1->dimensions[0]; + dimensions[1] = ap2->dimensions[1]; + } + } + + ret = (PyArrayObject *)PyArray_FromDims(nd, dimensions, typenum); + if (ret == NULL) goto fail; + + if (ap2->nd == 0) { + /* Multiplication by a scalar -- Level 1 BLAS */ + if (typenum == PyArray_DOUBLE) { + cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, 1, + (double *)ret->data, 1); + } + else if (typenum == PyArray_FLOAT) { + cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, 1, + (float *)ret->data, 1); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, 1, + (double *)ret->data, 1); + } + else if (typenum == PyArray_CFLOAT) { + cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, 1, + (float *)ret->data, 1); + } + } + else if (ap1->nd == 1 && ap2->nd == 1) { + /* Dot product between two vectors -- Level 1 BLAS */ + if (typenum == PyArray_DOUBLE) { + double result = cblas_ddot(l, (double *)ap1->data, 1, + (double *)ap2->data, 1); + *((double *)ret->data) = result; + } + else if (typenum == PyArray_FLOAT) { + float result = cblas_sdot(l, (float *)ap1->data, 1, + (float *)ap2->data, 1); + *((float *)ret->data) = result; + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zdotu_sub(l, (double *)ap1->data, 1, + (double *)ap2->data, 1, (double *)ret->data); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cdotu_sub(l, (float *)ap1->data, 1, + (float *)ap2->data, 1, (float *)ret->data); + } + } + else if (ap1->nd == 2 && ap2->nd == 1) { + /* Matrix vector multiplication -- Level 2 BLAS */ + /* lda must be MAX(M,1) */ + lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); + if (typenum == PyArray_DOUBLE) { + cblas_dgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + 1.0, (double *)ap1->data, lda, + (double *)ap2->data, 1, 0.0, (double *)ret->data, 1); + } + else if (typenum == PyArray_FLOAT) { + cblas_sgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + 1.0, (float *)ap1->data, lda, + (float *)ap2->data, 1, 0.0, (float *)ret->data, 1); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + oneD, (double *)ap1->data, lda, + (double *)ap2->data, 1, zeroD, (double *)ret->data, 1); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + oneF, (float *)ap1->data, lda, + (float *)ap2->data, 1, zeroF, (float *)ret->data, 1); + } + } + else if (ap1->nd == 1 && ap2->nd == 2) { + /* Vector matrix multiplication -- Level 2 BLAS */ + lda = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); + if (typenum == PyArray_DOUBLE) { + cblas_dgemv(CblasRowMajor, + CblasTrans, ap2->dimensions[0], ap2->dimensions[1], + 1.0, (double *)ap2->data, lda, + (double *)ap1->data, 1, 0.0, (double *)ret->data, 1); + } + else if (typenum == PyArray_FLOAT) { + cblas_sgemv(CblasRowMajor, + CblasTrans, ap2->dimensions[0], ap2->dimensions[1], + 1.0, (float *)ap2->data, lda, + (float *)ap1->data, 1, 0.0, (float *)ret->data, 1); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zgemv(CblasRowMajor, + CblasTrans, ap2->dimensions[0], ap2->dimensions[1], + oneD, (double *)ap2->data, lda, + (double *)ap1->data, 1, zeroD, (double *)ret->data, 1); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cgemv(CblasRowMajor, + CblasTrans, ap2->dimensions[0], ap2->dimensions[1], + oneF, (float *)ap2->data, lda, + (float *)ap1->data, 1, zeroF, (float *)ret->data, 1); + } + } + else { /* (ap1->nd == 2 && ap2->nd == 2) */ + /* Matrix matrix multiplication -- Level 3 BLAS */ + lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); + ldb = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); + if (typenum == PyArray_DOUBLE) { + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], + 1.0, (double *)ap1->data, lda, + (double *)ap2->data, ldb, + 0.0, (double *)ret->data, ldb); + } + else if (typenum == PyArray_FLOAT) { + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], + 1.0, (float *)ap1->data, lda, + (float *)ap2->data, ldb, + 0.0, (float *)ret->data, ldb); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], + oneD, (double *)ap1->data, lda, + (double *)ap2->data, ldb, + zeroD, (double *)ret->data, ldb); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + ap1->dimensions[0], ap2->dimensions[1], ap2->dimensions[0], + oneF, (float *)ap1->data, lda, + (float *)ap2->data, ldb, + zeroF, (float *)ret->data, ldb); + } + } + + Py_DECREF(ap1); + Py_DECREF(ap2); + return PyArray_Return(ret); fail: - Py_XDECREF(ap1); - Py_XDECREF(ap2); - Py_XDECREF(ret); - return NULL; + Py_XDECREF(ap1); + Py_XDECREF(ap2); + Py_XDECREF(ret); + return NULL; } static char doc_innerproduct[] = "innerproduct(a,b)\nReturns the inner product of a and b for arrays of floating point types.\nLike the generic Numeric equivalent the product sum is over\nthe last dimension of a and b.\nNB: The first argument is not conjugated."; -static PyObject *dotblas_innerproduct(PyObject *dummy, PyObject *args) { - PyObject *op1, *op2; - PyArrayObject *ap1, *ap2, *ret; - int i, j, l, lda, ldb; - int typenum; - int dimensions[MAX_DIMS], nd; - static const float oneF[2] = {1.0, 0.0}; - static const float zeroF[2] = {0.0, 0.0}; - static const double oneD[2] = {1.0, 0.0}; - static const double zeroD[2] = {0.0, 0.0}; +static PyObject * +dotblas_innerproduct(PyObject *dummy, PyObject *args) +{ + PyObject *op1, *op2; + PyArrayObject *ap1, *ap2, *ret; + int i, j, l, lda, ldb; + int typenum; + int dimensions[MAX_DIMS], nd; + PyArray_DotFunc *dotfunc; + static const float oneF[2] = {1.0, 0.0}; + static const float zeroF[2] = {0.0, 0.0}; + static const double oneD[2] = {1.0, 0.0}; + static const double zeroD[2] = {0.0, 0.0}; + + + if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; + + /* + * Inner product using the BLAS. The product sum is taken along the last + * dimensions of the two arrays. + * Only speeds things up for float double and complex types. + */ - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; + typenum = PyArray_ObjectType(op1, 0); + typenum = PyArray_ObjectType(op2, typenum); + + /* This function doesn't handle other types */ + if ((typenum != PyArray_DOUBLE && typenum != PyArray_CDOUBLE && + typenum != PyArray_FLOAT && typenum != PyArray_CFLOAT)) { + return PyArray_InnerProduct(op1, op2); + } + + ret = NULL; + ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); + if (ap1 == NULL) return NULL; + ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); + if (ap2 == NULL) goto fail; + + + if ((ap1->nd > 2) || (ap2->nd > 2)) { + /* This function doesn't handle dimensions greater than 2 -- other + than to ensure the dot function is altered + */ + if (!altered) { + /* need to alter dot product */ + PyObject *tmp1, *tmp2; + tmp1 = PyTuple_New(0); + tmp2 = dotblas_alterdot(NULL, tmp1); + Py_DECREF(tmp1); + Py_DECREF(tmp2); + } + ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1, + (PyObject *)ap2); + Py_DECREF(ap1); + Py_DECREF(ap2); + return (PyObject *)ret; + } + + if (ap1->nd == 0 || ap2->nd == 0) { + /* One of ap1 or ap2 is a scalar */ + if (ap1->nd == 0) { /* Make ap2 the scalar */ + PyArrayObject *t = ap1; + ap1 = ap2; + ap2 = t; + } + for (l = 1, j = 0; j < ap1->nd; j++) { + dimensions[j] = ap1->dimensions[j]; + l *= dimensions[j]; + } + nd = ap1->nd; + } + else { /* (ap1->nd <= 2 && ap2->nd <= 2) */ + /* Both ap1 and ap2 are vectors or matrices */ + l = ap1->dimensions[ap1->nd-1]; - /* - * Inner product using the BLAS. The product sum is taken along the last - * dimensions of the two arrays. - * Only works for float double and complex types. - */ - + if (ap2->dimensions[ap2->nd-1] != l) { + PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + goto fail; + } + nd = ap1->nd+ap2->nd-2; + + if (nd == 1) + dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[0]; + else if (nd == 2) { + dimensions[0] = ap1->dimensions[0]; + dimensions[1] = ap2->dimensions[0]; + } + } + + ret = (PyArrayObject *)PyArray_FromDims(nd, dimensions, typenum); + if (ret == NULL) goto fail; + + if (ap2->nd == 0) { + /* Multiplication by a scalar -- Level 1 BLAS */ + if (typenum == PyArray_DOUBLE) { + cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, 1, + (double *)ret->data, 1); + } + else if (typenum == PyArray_FLOAT) { + cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, 1, + (float *)ret->data, 1); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, 1, + (double *)ret->data, 1); + } + else if (typenum == PyArray_CFLOAT) { + cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, 1, + (float *)ret->data, 1); + } + } + else if (ap1->nd == 1 && ap2->nd == 1) { + /* Dot product between two vectors -- Level 1 BLAS */ + if (typenum == PyArray_DOUBLE) { + double result = cblas_ddot(l, (double *)ap1->data, 1, + (double *)ap2->data, 1); + *((double *)ret->data) = result; + } + else if (typenum == PyArray_FLOAT) { + float result = cblas_sdot(l, (float *)ap1->data, 1, + (float *)ap2->data, 1); + *((float *)ret->data) = result; + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zdotu_sub(l, (double *)ap1->data, 1, + (double *)ap2->data, 1, (double *)ret->data); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cdotu_sub(l, (float *)ap1->data, 1, + (float *)ap2->data, 1, (float *)ret->data); + } + } + else if (ap1->nd == 2 && ap2->nd == 1) { + /* Matrix-vector multiplication -- Level 2 BLAS */ + lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); + if (typenum == PyArray_DOUBLE) { + cblas_dgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + 1.0, (double *)ap1->data, lda, + (double *)ap2->data, 1, 0.0, (double *)ret->data, 1); + } + else if (typenum == PyArray_FLOAT) { + cblas_sgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + 1.0, (float *)ap1->data, lda, + (float *)ap2->data, 1, 0.0, (float *)ret->data, 1); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + oneD, (double *)ap1->data, lda, + (double *)ap2->data, 1, zeroD, (double *)ret->data, 1); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cgemv(CblasRowMajor, + CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], + oneF, (float *)ap1->data, lda, + (float *)ap2->data, 1, zeroF, (float *)ret->data, 1); + } + } + else if (ap1->nd == 1 && ap2->nd == 2) { + /* Vector matrix multiplication -- Level 2 BLAS */ + lda = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); + if (typenum == PyArray_DOUBLE) { + cblas_dgemv(CblasRowMajor, + CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], + 1.0, (double *)ap2->data, lda, + (double *)ap1->data, 1, 0.0, (double *)ret->data, 1); + } + else if (typenum == PyArray_FLOAT) { + cblas_sgemv(CblasRowMajor, + CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], + 1.0, (float *)ap2->data, lda, + (float *)ap1->data, 1, 0.0, (float *)ret->data, 1); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zgemv(CblasRowMajor, + CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], + oneD, (double *)ap2->data, lda, + (double *)ap1->data, 1, zeroD, (double *)ret->data, 1); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cgemv(CblasRowMajor, + CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], + oneF, (float *)ap2->data, lda, + (float *)ap1->data, 1, zeroF, (float *)ret->data, 1); + } + } + else { /* (ap1->nd == 2 && ap2->nd == 2) */ + /* Matrix matrix multiplication -- Level 3 BLAS */ + lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); + ldb = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); + if (typenum == PyArray_DOUBLE) { + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], + 1.0, (double *)ap1->data, lda, + (double *)ap2->data, ldb, + 0.0, (double *)ret->data, ldb); + } + else if (typenum == PyArray_FLOAT) { + cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], + 1.0, (float *)ap1->data, lda, + (float *)ap2->data, ldb, + 0.0, (float *)ret->data, ldb); + } + else if (typenum == PyArray_CDOUBLE) { + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], + oneD, (double *)ap1->data, lda, + (double *)ap2->data, ldb, + zeroD, (double *)ret->data, ldb); + } + else if (typenum == PyArray_CFLOAT) { + cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasTrans, + ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], + oneF, (float *)ap1->data, lda, + (float *)ap2->data, ldb, + zeroF, (float *)ret->data, ldb); + } + } + Py_DECREF(ap1); + Py_DECREF(ap2); + return PyArray_Return(ret); + + fail: + Py_XDECREF(ap1); + Py_XDECREF(ap2); + Py_XDECREF(ret); + return NULL; +} - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); - ret = NULL; - ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); - if (ap1 == NULL) return NULL; - ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); - if (ap2 == NULL) goto fail; +static char doc_vdot[] = "vdot(a,b)\nReturns the dot product of a and b for scalars and vectors\nof floating point and complex types. The first argument, a, is conjugated."; - if (typenum != PyArray_FLOAT && typenum != PyArray_DOUBLE && - typenum != PyArray_CFLOAT && typenum != PyArray_CDOUBLE) { - PyErr_SetString(PyExc_TypeError, "at least one argument must be (possibly complex) float or double"); - goto fail; - } - - if (ap1->nd < 0 || ap2->nd < 0) { - PyErr_SetString(PyExc_TypeError, "negative dimensioned arrays!"); - goto fail; - } - - if (ap1->nd == 0 || ap2->nd == 0) { - /* One of ap1 or ap2 is a scalar */ - if (ap1->nd == 0) { /* Make ap2 the scalar */ - PyArrayObject *t = ap1; - ap1 = ap2; - ap2 = t; - } - for (l = 1, j = 0; j < ap1->nd; j++) { - dimensions[j] = ap1->dimensions[j]; - l *= dimensions[j]; - } - nd = ap1->nd; - } - else if (ap1->nd <= 2 && ap2->nd <= 2) { - /* Both ap1 and ap2 are vectors or matrices */ - l = ap1->dimensions[ap1->nd-1]; +static PyObject *dotblas_vdot(PyObject *dummy, PyObject *args) { + PyObject *op1, *op2; + PyArrayObject *ap1=NULL, *ap2=NULL, *ret=NULL; + int l; + int typenum; + int dimensions[MAX_DIMS]; + PyArray_Typecode type; + + if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; - if (ap2->dimensions[ap2->nd-1] != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); - goto fail; - } - nd = ap1->nd+ap2->nd-2; - - if (nd == 1) - dimensions[0] = (ap1->nd == 2) ? ap1->dimensions[0] : ap2->dimensions[0]; - else if (nd == 2) { - dimensions[0] = ap1->dimensions[0]; - dimensions[1] = ap2->dimensions[0]; - } - } - else { - /* At least one of ap1 or ap2 has dimension > 2. */ - l = ap1->dimensions[ap1->nd-1]; + /* + * Conjugating dot product using the BLAS for vectors. + * Multiplies op1 and op2, each of which must be vector. + */ + + typenum = PyArray_ObjectType(op1, 0); + typenum = PyArray_ObjectType(op2, typenum); - if (ap2->dimensions[ap2->nd-1] != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); - goto fail; + type.type_num = typenum; + + ap1 = PyArray_FromAny(op1, &type, 0, 0, NULL); + if (ap1==NULL) goto fail; + op1 = PyArray_Flatten(ap1); + if (op1==NULL) goto fail; + Py_DECREF(ap1); + ap1 = (PyArrayObject *)op1; + + ap2 = PyArray_FromAny(op2, &type, 0, 0, NULL); + if (ap2==NULL) goto fail; + op2 = PyArray_Flatten(ap2); + if (op2 == NULL) goto fail; + Py_DECREF(ap2); + ap2 = (PyArrayObject *)op2; + + if (typenum != PyArray_FLOAT && typenum != PyArray_DOUBLE && + typenum != PyArray_CFLOAT && typenum != PyArray_CDOUBLE) { + PyErr_SetString(PyExc_TypeError, "at least one argument must be (possibly complex) float or double"); + goto fail; } - nd = ap1->nd+ap2->nd-2; - j = 0; - for(i=0; i<ap1->nd-1; i++) { - dimensions[j++] = ap1->dimensions[i]; + if (ap1->nd != 1 || ap2->nd != 1) { + PyErr_SetString(PyExc_TypeError, "arguments must be vectors"); + goto fail; } - for(i=0; i<ap2->nd-1; i++) { - dimensions[j++] = ap2->dimensions[i]; + + if (ap2->dimensions[0] != ap1->dimensions[ap1->nd-1]) { + PyErr_SetString(PyExc_ValueError, "vectors have different lengths"); + goto fail; } - } + l = ap1->dimensions[ap1->nd-1]; + + ret = (PyArrayObject *)PyArray_FromDims(0, dimensions, typenum); + if (ret == NULL) goto fail; - ret = (PyArrayObject *)PyArray_FromDims(nd, dimensions, typenum); - if (ret == NULL) goto fail; - if (ap2->nd == 0) { - /* Multiplication by a scalar -- Level 1 BLAS */ - if (typenum == PyArray_DOUBLE) { - cblas_daxpy(l, *((double *)ap2->data), (double *)ap1->data, 1, - (double *)ret->data, 1); - } - else if (typenum == PyArray_FLOAT) { - cblas_saxpy(l, *((float *)ap2->data), (float *)ap1->data, 1, - (float *)ret->data, 1); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zaxpy(l, (double *)ap2->data, (double *)ap1->data, 1, - (double *)ret->data, 1); - } - else if (typenum == PyArray_CFLOAT) { - cblas_caxpy(l, (float *)ap2->data, (float *)ap1->data, 1, - (float *)ret->data, 1); - } - } - else if (ap1->nd == 1 && ap2->nd == 1) { /* Dot product between two vectors -- Level 1 BLAS */ if (typenum == PyArray_DOUBLE) { - double result = cblas_ddot(l, (double *)ap1->data, 1, - (double *)ap2->data, 1); - *((double *)ret->data) = result; - } - else if (typenum == PyArray_FLOAT) { - float result = cblas_sdot(l, (float *)ap1->data, 1, - (float *)ap2->data, 1); - *((float *)ret->data) = result; - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zdotu_sub(l, (double *)ap1->data, 1, - (double *)ap2->data, 1, (double *)ret->data); - } - else if (typenum == PyArray_CFLOAT) { - cblas_cdotu_sub(l, (float *)ap1->data, 1, - (float *)ap2->data, 1, (float *)ret->data); - } - } - else if (ap1->nd == 2 && ap2->nd == 1) { - /* Matrix-vector multiplication -- Level 2 BLAS */ - lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); - if (typenum == PyArray_DOUBLE) { - cblas_dgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - 1.0, (double *)ap1->data, lda, - (double *)ap2->data, 1, 0.0, (double *)ret->data, 1); - } - else if (typenum == PyArray_FLOAT) { - cblas_sgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - 1.0, (float *)ap1->data, lda, - (float *)ap2->data, 1, 0.0, (float *)ret->data, 1); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - oneD, (double *)ap1->data, lda, - (double *)ap2->data, 1, zeroD, (double *)ret->data, 1); - } - else if (typenum == PyArray_CFLOAT) { - cblas_cgemv(CblasRowMajor, - CblasNoTrans, ap1->dimensions[0], ap1->dimensions[1], - oneF, (float *)ap1->data, lda, - (float *)ap2->data, 1, zeroF, (float *)ret->data, 1); - } - } - else if (ap1->nd == 1 && ap2->nd == 2) { - /* Vector matrix multiplication -- Level 2 BLAS */ - lda = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); - if (typenum == PyArray_DOUBLE) { - cblas_dgemv(CblasRowMajor, - CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], - 1.0, (double *)ap2->data, lda, - (double *)ap1->data, 1, 0.0, (double *)ret->data, 1); + *((double *)ret->data) = cblas_ddot(l, (double *)ap1->data, 1, + (double *)ap2->data, 1); } else if (typenum == PyArray_FLOAT) { - cblas_sgemv(CblasRowMajor, - CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], - 1.0, (float *)ap2->data, lda, - (float *)ap1->data, 1, 0.0, (float *)ret->data, 1); + *((float *)ret->data) = cblas_sdot(l, (float *)ap1->data, 1, + (float *)ap2->data, 1); } else if (typenum == PyArray_CDOUBLE) { - cblas_zgemv(CblasRowMajor, - CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], - oneD, (double *)ap2->data, lda, - (double *)ap1->data, 1, zeroD, (double *)ret->data, 1); + cblas_zdotc_sub(l, (double *)ap1->data, 1, + (double *)ap2->data, 1, (double *)ret->data); } else if (typenum == PyArray_CFLOAT) { - cblas_cgemv(CblasRowMajor, - CblasNoTrans, ap2->dimensions[0], ap2->dimensions[1], - oneF, (float *)ap2->data, lda, - (float *)ap1->data, 1, zeroF, (float *)ret->data, 1); - } - } - else if (ap1->nd == 2 && ap2->nd == 2) { - /* Matrix matrix multiplication -- Level 3 BLAS */ - lda = (ap1->dimensions[1] > 1 ? ap1->dimensions[1] : 1); - ldb = (ap2->dimensions[1] > 1 ? ap2->dimensions[1] : 1); - if (typenum == PyArray_DOUBLE) { - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], - 1.0, (double *)ap1->data, lda, - (double *)ap2->data, ldb, - 0.0, (double *)ret->data, ldb); - } - else if (typenum == PyArray_FLOAT) { - cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], - 1.0, (float *)ap1->data, lda, - (float *)ap2->data, ldb, - 0.0, (float *)ret->data, ldb); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], - oneD, (double *)ap1->data, lda, - (double *)ap2->data, ldb, - zeroD, (double *)ret->data, ldb); + cblas_cdotc_sub(l, (float *)ap1->data, 1, + (float *)ap2->data, 1, (float *)ret->data); } - else if (typenum == PyArray_CFLOAT) { - cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasTrans, - ap1->dimensions[0], ap2->dimensions[0], ap1->dimensions[1], - oneF, (float *)ap1->data, lda, - (float *)ap2->data, ldb, - zeroF, (float *)ret->data, ldb); - } - } - else { - /* Deal with arrays with dimension greater than two. All we do is copy - * the original multiarraymodule.c/PyArray_InnerProduct but use the BLAS - * one dimensional functions instead of the macro versions in - * multiarraymodule - */ - int is1, is2, n1, n2, os, i1, i2, s1, s2; - char *ip1, *ip2, *op; - DotFunction *dot = dotFunctions[(int)(ret->descr->type_num)]; - if (dot == NULL) { - PyErr_SetString(PyExc_ValueError, - "dotblas inner product not available for this type"); - goto fail; - } - - n1 = PyArray_SIZE(ap1)/l; - n2 = PyArray_SIZE(ap2)/l; - is1 = ap1->strides[ap1->nd-1]; - s1 = is1/ap1->descr->elsize; - is2 = ap2->strides[ap2->nd-1]; - s2 = is2/ap2->descr->elsize; - op = ret->data; - os = ret->descr->elsize; - - ip1 = ap1->data; - for(i1=0; i1<n1; i1++) { - ip2 = ap2->data; - for(i2=0; i2<n2; i2++) { - dot(ip1, s1, ip2, s2, op, l); - ip2 += is2*l; - op += os; - } - ip1 += is1*l; - } - } - - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); + Py_DECREF(ap1); + Py_DECREF(ap2); + return PyArray_Return(ret); fail: - Py_XDECREF(ap1); - Py_XDECREF(ap2); - Py_XDECREF(ret); - return NULL; + Py_XDECREF(ap1); + Py_XDECREF(ap2); + Py_XDECREF(ret); + return NULL; } -static char doc_vdot[] = "vdot(a,b)\nReturns the dot product of a and b for scalars and vectors\nof floating point and complex types. The first argument, a, is conjugated."; +static char doc_alterdot[] = "alterdot() changes all dot functions to use blas."; + +static PyObject * +dotblas_alterdot(PyObject *dummy, PyObject *args) +{ + PyArray_Descr *descr; + + if (!PyArg_ParseTuple(args, "")) return NULL; + /* Replace the dot functions to the ones using blas */ + descr = PyArray_DescrFromType(PyArray_FLOAT); + oldFunctions[PyArray_FLOAT] = descr->dotfunc; + descr->dotfunc = FLOAT_dot; -static PyObject *dotblas_vdot(PyObject *dummy, PyObject *args) { - PyObject *op1, *op2; - PyArrayObject *ap1, *ap2, *ret; - int l; - int typenum; - int dimensions[MAX_DIMS]; + descr = PyArray_DescrFromType(PyArray_DOUBLE); + oldFunctions[PyArray_DOUBLE] = descr->dotfunc; + descr->dotfunc = DOUBLE_dot; - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) return NULL; - - /* - * Conjugating dot product using the BLAS for vectors. - * Multiplies op1 and op2, each of which must be vector. - */ - - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); - - - ret = NULL; - ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); - if (ap1 == NULL) return NULL; - ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); - if (ap2 == NULL) goto fail; - - if (typenum != PyArray_FLOAT && typenum != PyArray_DOUBLE && - typenum != PyArray_CFLOAT && typenum != PyArray_CDOUBLE) { - PyErr_SetString(PyExc_TypeError, "at least one argument must be (possibly complex) float or double"); - goto fail; - } - - if (ap1->nd != 1 || ap2->nd != 1) { - PyErr_SetString(PyExc_TypeError, "arguments must be vectors"); - goto fail; - } - - if (ap2->dimensions[0] != ap1->dimensions[ap1->nd-1]) { - PyErr_SetString(PyExc_ValueError, "vectors have different lengths"); - goto fail; - } - l = ap1->dimensions[ap1->nd-1]; - - ret = (PyArrayObject *)PyArray_FromDims(0, dimensions, typenum); - if (ret == NULL) goto fail; - - - /* Dot product between two vectors -- Level 1 BLAS */ - if (typenum == PyArray_DOUBLE) { - *((double *)ret->data) = cblas_ddot(l, (double *)ap1->data, 1, - (double *)ap2->data, 1); - } - else if (typenum == PyArray_FLOAT) { - *((float *)ret->data) = cblas_sdot(l, (float *)ap1->data, 1, - (float *)ap2->data, 1); - } - else if (typenum == PyArray_CDOUBLE) { - cblas_zdotc_sub(l, (double *)ap1->data, 1, - (double *)ap2->data, 1, (double *)ret->data); - } - else if (typenum == PyArray_CFLOAT) { - cblas_cdotc_sub(l, (float *)ap1->data, 1, - (float *)ap2->data, 1, (float *)ret->data); - } - - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - - fail: - Py_XDECREF(ap1); - Py_XDECREF(ap2); - Py_XDECREF(ret); - return NULL; + descr = PyArray_DescrFromType(PyArray_CFLOAT); + oldFunctions[PyArray_CFLOAT] = descr->dotfunc; + descr->dotfunc = CFLOAT_dot; + + descr = PyArray_DescrFromType(PyArray_CDOUBLE); + oldFunctions[PyArray_CDOUBLE] = descr->dotfunc; + descr->dotfunc = CDOUBLE_dot; + + Py_INCREF(Py_None); + return Py_None; } +static char doc_restoredot[] = "restoredot() restores dots to defaults."; + +static PyObject * +dotblas_restoredot(PyObject *dummy, PyObject *args) +{ + + if (!PyArg_ParseTuple(args, "")) return NULL; + + if (oldFunctions[PyArray_FLOAT]) { + descr->dotfunc = oldFunctions[PyArray_FLOAT]; + oldFunctions[PyArray_FLOAT] = NULL; + } + + if (oldFunctions[PyArray_DOUBLE]) { + descr->dotfunc = oldFunctions[PyArray_DOUBLE]; + oldFunctions[PyArray_DOUBLE] = NULL; + } + + if (oldFunctions[PyArray_CFLOAT]) { + descr->dotfunc = oldFunctions[PyArray_CFLOAT]; + oldFunctions[PyArray_CFLOAT] = NULL; + } + + + if (oldFunctions[PyArray_CDOUBLE]) { + descr->dotfunc = oldFunctions[PyArray_CDOUBLE]; + oldFunctions[PyArray_CDOUBLE] = NULL; + } + + Py_INCREF(Py_None); + return Py_None; +} + static struct PyMethodDef dotblas_module_methods[] = { - {"matrixproduct", (PyCFunction)dotblas_matrixproduct, 1, doc_matrixproduct}, - {"innerproduct", (PyCFunction)dotblas_innerproduct, 1, doc_innerproduct}, - {"vdot", (PyCFunction)dotblas_vdot, 1, doc_vdot}, + {"dot", (PyCFunction)dotblas_matrixproduct, 1, doc_matrixproduct}, + {"inner", (PyCFunction)dotblas_innerproduct, 1, doc_innerproduct}, + {"vdot", (PyCFunction)dotblas_vdot, 1, doc_vdot}, + {"alterdot", (PyCFunction)dotblas_alterdot, 1, doc_alterdot}, + {"restoredot", (PyCFunction)dotblas_restoredot, 1, doc_restoredot}, {NULL, NULL, 0} /* sentinel */ }; /* Initialization function for the module */ DL_EXPORT(void) init_dotblas(void) { - int i; + int i; PyObject *m, *d, *s; - + /* Create the module and add the functions */ m = Py_InitModule3("_dotblas", dotblas_module_methods, module_doc); /* Import the array object */ import_array(); - /* Add some symbolic constants to the module */ - d = PyModule_GetDict(m); - - s = PyString_FromString("$Id: _dotblas.c,v 1.3 2005/04/06 22:40:23 dmcooke Exp $"); - PyDict_SetItemString(d, "__version__", s); - Py_DECREF(s); - /* Initialise the array of dot functions */ for (i = 0; i < PyArray_NTYPES; i++) - dotFunctions[i] = NULL; - dotFunctions[PyArray_FLOAT] = FLOAT_dot; - dotFunctions[PyArray_DOUBLE] = DOUBLE_dot; - dotFunctions[PyArray_CFLOAT] = CFLOAT_dot; - dotFunctions[PyArray_CDOUBLE] = CDOUBLE_dot; + oldFunctions[i] = NULL; + /* alterdot at load */ + d = PyTuple_New(0); + s = dotblas_alterdot(NULL, d); + Py_DECREF(d); + Py_DECREF(s); /* Check for errors */ if (PyErr_Occurred()) |