diff options
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/_add_newdocs.py | 123 | ||||
-rw-r--r-- | numpy/core/code_generators/generate_umath.py | 21 | ||||
-rw-r--r-- | numpy/core/code_generators/ufunc_docstrings.py | 125 | ||||
-rw-r--r-- | numpy/core/multiarray.py | 1 | ||||
-rw-r--r-- | numpy/core/setup.py | 3 | ||||
-rw-r--r-- | numpy/core/src/common/cblasfuncs.c | 2 | ||||
-rw-r--r-- | numpy/core/src/common/cblasfuncs.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 155 | ||||
-rw-r--r-- | numpy/core/src/multiarray/number.c | 10 | ||||
-rw-r--r-- | numpy/core/src/multiarray/number.h | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/scalartypes.c.src | 16 | ||||
-rw-r--r-- | numpy/core/src/umath/matmul.c.src | 403 | ||||
-rw-r--r-- | numpy/core/src/umath/matmul.h.src | 12 | ||||
-rw-r--r-- | numpy/core/src/umath/scalarmath.c.src | 17 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 12 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.c | 87 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_type_resolution.h | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 177 |
18 files changed, 812 insertions, 361 deletions
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index bc034c3e9..668aee935 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -1513,129 +1513,6 @@ add_newdoc('numpy.core.multiarray', 'getbuffer', """) -add_newdoc('numpy.core', 'matmul', - """ - matmul(a, b, out=None) - - Matrix product of two arrays. - - The behavior depends on the arguments in the following way. - - - If both arguments are 2-D they are multiplied like conventional - matrices. - - If either argument is N-D, N > 2, it is treated as a stack of - matrices residing in the last two indexes and broadcast accordingly. - - If the first argument is 1-D, it is promoted to a matrix by - prepending a 1 to its dimensions. After matrix multiplication - the prepended 1 is removed. - - If the second argument is 1-D, it is promoted to a matrix by - appending a 1 to its dimensions. After matrix multiplication - the appended 1 is removed. - - Multiplication by a scalar is not allowed, use ``*`` instead. Note that - multiplying a stack of matrices with a vector will result in a stack of - vectors, but matmul will not recognize it as such. - - ``matmul`` differs from ``dot`` in two important ways. - - - Multiplication by scalars is not allowed. - - Stacks of matrices are broadcast together as if the matrices - were elements. - - .. warning:: - This function is preliminary and included in NumPy 1.10.0 for testing - and documentation. Its semantics will not change, but the number and - order of the optional arguments will. - - .. versionadded:: 1.10.0 - - Parameters - ---------- - a : array_like - First argument. - b : array_like - Second argument. - out : ndarray, optional - Output argument. This must have the exact kind that would be returned - if it was not used. In particular, it must have the right type, must be - C-contiguous, and its dtype must be the dtype that would be returned - for `dot(a,b)`. This is a performance feature. Therefore, if these - conditions are not met, an exception is raised, instead of attempting - to be flexible. - - Returns - ------- - output : ndarray - Returns the dot product of `a` and `b`. If `a` and `b` are both - 1-D arrays then a scalar is returned; otherwise an array is - returned. If `out` is given, then it is returned. - - Raises - ------ - ValueError - If the last dimension of `a` is not the same size as - the second-to-last dimension of `b`. - - If scalar value is passed. - - See Also - -------- - vdot : Complex-conjugating dot product. - tensordot : Sum products over arbitrary axes. - einsum : Einstein summation convention. - dot : alternative matrix product with different broadcasting rules. - - Notes - ----- - The matmul function implements the semantics of the `@` operator introduced - in Python 3.5 following PEP465. - - Examples - -------- - For 2-D arrays it is the matrix product: - - >>> a = [[1, 0], [0, 1]] - >>> b = [[4, 1], [2, 2]] - >>> np.matmul(a, b) - array([[4, 1], - [2, 2]]) - - For 2-D mixed with 1-D, the result is the usual. - - >>> a = [[1, 0], [0, 1]] - >>> b = [1, 2] - >>> np.matmul(a, b) - array([1, 2]) - >>> np.matmul(b, a) - array([1, 2]) - - - Broadcasting is conventional for stacks of arrays - - >>> a = np.arange(2*2*4).reshape((2,2,4)) - >>> b = np.arange(2*2*4).reshape((2,4,2)) - >>> np.matmul(a,b).shape - (2, 2, 2) - >>> np.matmul(a,b)[0,1,1] - 98 - >>> sum(a[0,1,:] * b[0,:,1]) - 98 - - Vector, vector returns the scalar inner product, but neither argument - is complex-conjugated: - - >>> np.matmul([2j, 3j], [2j, 3j]) - (-13+0j) - - Scalar multiplication raises an error. - - >>> np.matmul([1,2], 3) - Traceback (most recent call last): - ... - ValueError: Scalar operands are not allowed, use '*' instead - - """) - add_newdoc('numpy.core.multiarray', 'c_einsum', """ c_einsum(subscripts, *operands, out=None, dtype=None, order='K', diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 9d4e72c0e..a092cb95e 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -127,7 +127,7 @@ class Ufunc(object): type_descriptions : list of TypeDescription objects """ def __init__(self, nin, nout, identity, docstring, typereso, - *type_descriptions): + *type_descriptions, **kwargs): self.nin = nin self.nout = nout if identity is None: @@ -136,10 +136,13 @@ class Ufunc(object): self.docstring = docstring self.typereso = typereso self.type_descriptions = [] + self.signature = kwargs.pop('signature', 'NULL') for td in type_descriptions: self.type_descriptions.extend(td) for td in self.type_descriptions: td.finish_signature(self.nin, self.nout) + if kwargs: + raise ValueError('unknown kwargs %r' % str(kwargs)) # String-handling utilities to avoid locale-dependence. @@ -904,7 +907,14 @@ defdict = { "PyUFunc_SimpleBinaryOperationTypeResolver", TD(ints), TD('O', f='npy_ObjectLCM'), - ) + ), +'matmul' : + Ufunc(2, 1, None, + docstrings.get('numpy.core.umath.matmul'), + "PyUFunc_MatmulTypeResolver", + TD(notimes_or_obj), + signature='"(n?,k),(k,m?)->(n?,m?)"', + ), } if sys.version_info[0] >= 3: @@ -1058,7 +1068,7 @@ def make_ufuncs(funcdict): f = PyUFunc_FromFuncAndDataAndSignatureAndIdentity( {name}_functions, {name}_data, {name}_signatures, {nloops}, {nin}, {nout}, {identity}, "{name}", - "{doc}", 0, NULL, identity + "{doc}", 0, {signature}, identity ); if ({has_identity}) {{ Py_DECREF(identity); @@ -1073,7 +1083,8 @@ def make_ufuncs(funcdict): has_identity='0' if uf.identity is None_ else '1', identity='PyUFunc_IdentityValue', identity_expr=uf.identity, - doc=docstring + doc=docstring, + signature=uf.signature, ) # Only PyUFunc_None means don't reorder - we pass this using the old @@ -1106,6 +1117,8 @@ def make_code(funcdict, filename): #include "cpuid.h" #include "ufunc_object.h" #include "ufunc_type_resolution.h" + #include "loops.h" + #include "matmul.h" %s static int diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index 13231de29..d79702a18 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -39,7 +39,8 @@ subst = { def add_newdoc(place, name, doc): doc = textwrap.dedent(doc).strip() - if name[0] != '_': + if name[0] != '_' and name != 'matmul': + # matmul is special, it does not use the OUT_SCALAR replacement strings if '\nx :' in doc: assert '$OUT_SCALAR_1' in doc, "in {}".format(name) elif '\nx2 :' in doc or '\nx1, x2 :' in doc: @@ -2541,6 +2542,128 @@ add_newdoc('numpy.core.umath', 'fmin', """) +add_newdoc('numpy.core.umath', 'matmul', + """ + Matrix product of two arrays. + + Parameters + ---------- + x1, x2 : array_like + Input arrays, scalars not allowed. + out : ndarray, optional + A location into which the result is stored. If provided, it must have + a shape that matches the signature `(n,k),(k,m)->(n,m)`. If not + provided or `None`, a freshly-allocated array is returned. + **kwargs + For other keyword-only arguments, see the + :ref:`ufunc docs <ufuncs.kwargs>`. + + ..versionchanged:: 1.16 + Now handles ufunc kwargs + + Returns + ------- + y : ndarray + The matrix product of the inputs. + This is a scalar only when both x1, x2 are 1-d vectors. + + Raises + ------ + ValueError + If the last dimension of `a` is not the same size as + the second-to-last dimension of `b`. + + If a scalar value is passed in. + + See Also + -------- + vdot : Complex-conjugating dot product. + tensordot : Sum products over arbitrary axes. + einsum : Einstein summation convention. + dot : alternative matrix product with different broadcasting rules. + + Notes + ----- + + The behavior depends on the arguments in the following way. + + - If both arguments are 2-D they are multiplied like conventional + matrices. + - If either argument is N-D, N > 2, it is treated as a stack of + matrices residing in the last two indexes and broadcast accordingly. + - If the first argument is 1-D, it is promoted to a matrix by + prepending a 1 to its dimensions. After matrix multiplication + the prepended 1 is removed. + - If the second argument is 1-D, it is promoted to a matrix by + appending a 1 to its dimensions. After matrix multiplication + the appended 1 is removed. + + ``matmul`` differs from ``dot`` in two important ways. + + - Multiplication by scalars is not allowed, use ``*`` instead. + - Stacks of matrices are broadcast together as if the matrices + were elements, respecting the signature `(n,k),(k,m)->(n,m)`: + + >>> a = a = np.full([9,5,7,3], True, dtype=bool) + >>> c = np.full([9, 5, 4,3], True, dtype=bool) + >>> np.dot(a, c).shape + (9, 5, 7, 9, 5, 4) + >>> np.matmul(a, c).shape # n is 5, k is 3, m is 4 + (9, 5, 7, 4) + + The matmul function implements the semantics of the `@` operator introduced + in Python 3.5 following PEP465. + + Examples + -------- + For 2-D arrays it is the matrix product: + + >>> a = np.array([[1, 0], + ... [0, 1]]) + >>> b = np.array([[4, 1], + ... [2, 2]] + >>> np.matmul(a, b) + array([[4, 1], + [2, 2]]) + + For 2-D mixed with 1-D, the result is the usual. + + >>> a = np.array([[1, 0], + ... [0, 1]] + >>> b = np.array([1, 2]) + >>> np.matmul(a, b) + array([1, 2]) + >>> np.matmul(b, a) + array([1, 2]) + + + Broadcasting is conventional for stacks of arrays + + >>> a = np.arange(2*2*4).reshape((2,2,4)) + >>> b = np.arange(2*2*4).reshape((2,4,2)) + >>> np.matmul(a,b).shape + (2, 2, 2) + >>> np.matmul(a,b)[0,1,1] + 98 + >>> sum(a[0,1,:] * b[0,:,1]) + 98 + + Vector, vector returns the scalar inner product, but neither argument + is complex-conjugated: + + >>> np.matmul([2j, 3j], [2j, 3j]) + (-13+0j) + + Scalar multiplication raises an error. + + >>> np.matmul([1,2], 3) + Traceback (most recent call last): + ... + ValueError: Scalar operands are not allowed, use '*' instead + + .. versionadded:: 1.10.0 + """) + add_newdoc('numpy.core.umath', 'modf', """ Return the fractional and integral parts of an array, element-wise. diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 78963b0aa..d4d5922cd 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -51,7 +51,6 @@ fromiter.__module__ = 'numpy' frompyfunc.__module__ = 'numpy' fromstring.__module__ = 'numpy' geterrobj.__module__ = 'numpy' -matmul.__module__ = 'numpy' may_share_memory.__module__ = 'numpy' nested_iters.__module__ = 'numpy' promote_types.__module__ = 'numpy' diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 23a9e268b..467b590ac 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -732,6 +732,7 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'cblasfuncs.h'), join('src', 'common', 'lowlevel_strided_loops.h'), join('src', 'common', 'mem_overlap.h'), + join('src', 'common', 'npy_cblas.h'), join('src', 'common', 'npy_config.h'), join('src', 'common', 'npy_ctypes.h'), join('src', 'common', 'npy_extint128.h'), @@ -892,6 +893,8 @@ def configuration(parent_package='',top_path=None): join('src', 'umath', 'simd.inc.src'), join('src', 'umath', 'loops.h.src'), join('src', 'umath', 'loops.c.src'), + join('src', 'umath', 'matmul.h.src'), + join('src', 'umath', 'matmul.c.src'), join('src', 'umath', 'ufunc_object.c'), join('src', 'umath', 'extobj.c'), join('src', 'umath', 'cpuid.c'), diff --git a/numpy/core/src/common/cblasfuncs.c b/numpy/core/src/common/cblasfuncs.c index 6460c5db1..514297940 100644 --- a/numpy/core/src/common/cblasfuncs.c +++ b/numpy/core/src/common/cblasfuncs.c @@ -182,7 +182,7 @@ _select_matrix_shape(PyArrayObject *array) * This also makes sure that the data segment is aligned with * an itemsize address as well by returning one if not true. */ -static int +NPY_NO_EXPORT int _bad_strides(PyArrayObject *ap) { int itemsize = PyArray_ITEMSIZE(ap); diff --git a/numpy/core/src/common/cblasfuncs.h b/numpy/core/src/common/cblasfuncs.h index 66ce4ca5b..78eff25a0 100644 --- a/numpy/core/src/common/cblasfuncs.h +++ b/numpy/core/src/common/cblasfuncs.h @@ -3,5 +3,7 @@ NPY_NO_EXPORT PyObject * cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT int +_bad_strides(PyArrayObject *ap); #endif diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 909a24359..5ccb7f6d6 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -72,7 +72,6 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; ***************************************************************************** */ #include "funcs.inc" -#include "loops.h" #include "umathmodule.h" NPY_NO_EXPORT int initscalarmath(PyObject *); @@ -2318,157 +2317,6 @@ fail: return NULL; } - - -/* - * matmul - * - * Implements the protocol used by the '@' operator defined in PEP 364. - * Not in the NUMPY API at this time, maybe later. - * - * - * in1: Left hand side operand - * in2: Right hand side operand - * out: Either NULL, or an array into which the output should be placed. - * - * Returns NULL on error. - */ -static PyObject * -array_matmul(PyObject *NPY_UNUSED(m), PyObject *args, PyObject* kwds) -{ - PyObject *in1, *in2, *out = NULL; - char* kwlist[] = {"a", "b", "out", NULL }; - PyArrayObject *ap1, *ap2, *ret = NULL; - NPY_ORDER order = NPY_KEEPORDER; - NPY_CASTING casting = NPY_SAFE_CASTING; - PyArray_Descr *dtype; - int nd1, nd2, typenum; - char *subscripts; - PyArrayObject *ops[2]; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O:matmul", kwlist, - &in1, &in2, &out)) { - return NULL; - } - - if (out != NULL) { - if (out == Py_None) { - out = NULL; - } - else if (!PyArray_Check(out)) { - PyErr_SetString(PyExc_TypeError, "'out' must be an array"); - return NULL; - } - } - - dtype = PyArray_DescrFromObject(in1, NULL); - dtype = PyArray_DescrFromObject(in2, dtype); - if (dtype == NULL) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Cannot find a common data type."); - } - return NULL; - } - typenum = dtype->type_num; - - if (typenum == NPY_OBJECT) { - /* matmul is not currently implemented for object arrays */ - PyErr_SetString(PyExc_TypeError, - "Object arrays are not currently supported"); - Py_DECREF(dtype); - return NULL; - } - - ap1 = (PyArrayObject *)PyArray_FromAny(in1, dtype, 0, 0, - NPY_ARRAY_ALIGNED, NULL); - if (ap1 == NULL) { - return NULL; - } - - Py_INCREF(dtype); - ap2 = (PyArrayObject *)PyArray_FromAny(in2, dtype, 0, 0, - NPY_ARRAY_ALIGNED, NULL); - if (ap2 == NULL) { - Py_DECREF(ap1); - return NULL; - } - - if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { - /* Scalars are rejected */ - PyErr_SetString(PyExc_ValueError, - "Scalar operands are not allowed, use '*' instead"); - return NULL; - } - - nd1 = PyArray_NDIM(ap1); - nd2 = PyArray_NDIM(ap2); - -#if defined(HAVE_CBLAS) - if (nd1 <= 2 && nd2 <= 2 && - (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum || - NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) { - return cblas_matrixproduct(typenum, ap1, ap2, (PyArrayObject *)out); - } -#endif - - /* - * Use einsum for the stacked cases. This is a quick implementation - * to avoid setting up the proper iterators. Einsum broadcasts, so - * we need to check dimensions before the call. - */ - if (nd1 == 1 && nd2 == 1) { - /* vector vector */ - if (PyArray_DIM(ap1, 0) != PyArray_DIM(ap2, 0)) { - dot_alignment_error(ap1, 0, ap2, 0); - goto fail; - } - subscripts = "i, i"; - } - else if (nd1 == 1) { - /* vector matrix */ - if (PyArray_DIM(ap1, 0) != PyArray_DIM(ap2, nd2 - 2)) { - dot_alignment_error(ap1, 0, ap2, nd2 - 2); - goto fail; - } - subscripts = "i, ...ij"; - } - else if (nd2 == 1) { - /* matrix vector */ - if (PyArray_DIM(ap1, nd1 - 1) != PyArray_DIM(ap2, 0)) { - dot_alignment_error(ap1, nd1 - 1, ap2, 0); - goto fail; - } - subscripts = "...i, i"; - } - else { - /* matrix * matrix */ - if (PyArray_DIM(ap1, nd1 - 1) != PyArray_DIM(ap2, nd2 - 2)) { - dot_alignment_error(ap1, nd1 - 1, ap2, nd2 - 2); - goto fail; - } - subscripts = "...ij, ...jk"; - } - ops[0] = ap1; - ops[1] = ap2; - ret = PyArray_EinsteinSum(subscripts, 2, ops, NULL, order, casting, - (PyArrayObject *)out); - Py_DECREF(ap1); - Py_DECREF(ap2); - - /* If no output was supplied, possibly convert to a scalar */ - if (ret != NULL && out == NULL) { - return PyArray_Return((PyArrayObject *)ret); - } - return (PyObject *)ret; - -fail: - Py_XDECREF(ap1); - Py_XDECREF(ap2); - return NULL; -} - - static int einsum_sub_op_from_str(PyObject *args, PyObject **str_obj, char **subscripts, PyArrayObject **op) @@ -4285,9 +4133,6 @@ static struct PyMethodDef array_module_methods[] = { {"vdot", (PyCFunction)array_vdot, METH_VARARGS | METH_KEYWORDS, NULL}, - {"matmul", - (PyCFunction)array_matmul, - METH_VARARGS | METH_KEYWORDS, NULL}, {"c_einsum", (PyCFunction)array_einsum, METH_VARARGS|METH_KEYWORDS, NULL}, diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c index 5ee536d4f..d153a8a64 100644 --- a/numpy/core/src/multiarray/number.c +++ b/numpy/core/src/multiarray/number.c @@ -112,6 +112,7 @@ _PyArray_SetNumericOps(PyObject *dict) SET(minimum); SET(rint); SET(conjugate); + SET(matmul); return 0; } @@ -177,6 +178,7 @@ _PyArray_GetNumericOps(void) GET(minimum); GET(rint); GET(conjugate); + GET(matmul); return dict; fail: @@ -382,14 +384,8 @@ array_divmod(PyArrayObject *m1, PyObject *m2) static PyObject * array_matrix_multiply(PyArrayObject *m1, PyObject *m2) { - static PyObject *matmul = NULL; - - npy_cache_import("numpy.core.multiarray", "matmul", &matmul); - if (matmul == NULL) { - return NULL; - } BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_matrix_multiply, array_matrix_multiply); - return PyArray_GenericBinaryFunction(m1, m2, matmul); + return PyArray_GenericBinaryFunction(m1, m2, n_ops.matmul); } static PyObject * diff --git a/numpy/core/src/multiarray/number.h b/numpy/core/src/multiarray/number.h index fbdfe6f94..33a7cf872 100644 --- a/numpy/core/src/multiarray/number.h +++ b/numpy/core/src/multiarray/number.h @@ -39,6 +39,7 @@ typedef struct { PyObject *minimum; PyObject *rint; PyObject *conjugate; + PyObject *matmul; } NumericOps; extern NPY_NO_EXPORT NumericOps n_ops; diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 0f201b966..2f71c8ae9 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -1104,8 +1104,7 @@ static PyNumberMethods gentype_as_number = { (binaryfunc)gentype_add, /*nb_add*/ (binaryfunc)gentype_subtract, /*nb_subtract*/ (binaryfunc)gentype_multiply, /*nb_multiply*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) (binaryfunc)gentype_divide, /*nb_divide*/ #endif (binaryfunc)gentype_remainder, /*nb_remainder*/ @@ -1121,8 +1120,7 @@ static PyNumberMethods gentype_as_number = { (binaryfunc)gentype_and, /*nb_and*/ (binaryfunc)gentype_xor, /*nb_xor*/ (binaryfunc)gentype_or, /*nb_or*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) 0, /*nb_coerce*/ #endif (unaryfunc)gentype_int, /*nb_int*/ @@ -1132,16 +1130,14 @@ static PyNumberMethods gentype_as_number = { (unaryfunc)gentype_long, /*nb_long*/ #endif (unaryfunc)gentype_float, /*nb_float*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) (unaryfunc)gentype_oct, /*nb_oct*/ (unaryfunc)gentype_hex, /*nb_hex*/ #endif 0, /*inplace_add*/ 0, /*inplace_subtract*/ 0, /*inplace_multiply*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) 0, /*inplace_divide*/ #endif 0, /*inplace_remainder*/ @@ -1156,6 +1152,10 @@ static PyNumberMethods gentype_as_number = { 0, /*nb_inplace_floor_divide*/ 0, /*nb_inplace_true_divide*/ (unaryfunc)NULL, /*nb_index*/ +#if PY_VERSION_HEX >= 0x03050000 + 0, /*np_matmul*/ + 0, /*np_inplace_matmul*/ +#endif }; diff --git a/numpy/core/src/umath/matmul.c.src b/numpy/core/src/umath/matmul.c.src new file mode 100644 index 000000000..5f5bd3506 --- /dev/null +++ b/numpy/core/src/umath/matmul.c.src @@ -0,0 +1,403 @@ +/* -*- c -*- */ + +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "Python.h" + +#include "npy_config.h" +#include "numpy/npy_common.h" +#include "numpy/arrayobject.h" +#include "numpy/ufuncobject.h" +#include "numpy/npy_math.h" +#include "numpy/halffloat.h" +#include "lowlevel_strided_loops.h" + +#include "npy_pycompat.h" + +#include "npy_cblas.h" +#include "arraytypes.h" /* For TYPE_dot functions */ +#include <assert.h> + +/* + ***************************************************************************** + ** BASICS ** + ***************************************************************************** + */ + +#if defined(HAVE_CBLAS) +static const npy_cdouble oneD = {1.0, 0.0}, zeroD = {0.0, 0.0}; +static const npy_cfloat oneF = {1.0, 0.0}, zeroF = {0.0, 0.0}; + +/**begin repeat + * + * #name = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + * #ctype = npy_float, npy_double, npy_cfloat, npy_cdouble# + * #type = npy_float, npy_double, npy_cfloat, npy_cdouble# + * #prefix = s, d, c, z# + * #step1 = 1.F, 1., &oneF, &oneD# + * #step0 = 0.F, 0., &zeroF, &zeroD# + */ +NPY_NO_EXPORT void +@name@_gemv(void *ip1, npy_intp is1_m, void *ip2, npy_intp is2_n, void *op, + npy_intp m, npy_intp n) +{ + /* + * Vector matrix multiplication -- Level 2 BLAS + * arguments + * ip1: contiguous data, m*n shape + * ip2: data in c order, n*1 shape + * op: contiguous data in c order, m shape + */ + enum CBLAS_ORDER order; + int lda; + + if (is1_m == sizeof(@type@)) { + order = CblasColMajor; + lda = n; + } + else { + /* If not ColMajor, caller should have ensured we are RowMajor */ + /* will not assert in release mode */ + assert(is1_m == sizeof(@type@) * m); + order = CblasRowMajor; + lda = m; + } + cblas_@prefix@gemv(order, CblasTrans, n, m, @step1@, ip1, lda, ip2, + is2_n / sizeof(@type@), @step0@, op, 1); +} + +NPY_NO_EXPORT void +@name@_matmul_matrixmatrix(void *ip1, npy_intp is1_m, npy_intp is1_n, + void *ip2, npy_intp is2_n, npy_intp is2_p, + void *op, npy_intp m, npy_intp n, npy_intp p) +{ + /* + * matrix matrix multiplication -- Level 3 BLAS + */ + enum CBLAS_ORDER order = CblasRowMajor; + enum CBLAS_TRANSPOSE trans1, trans2; + int M, N, P, lda, ldb; + M = m; + N = n; + P = p; + + if (is1_m == sizeof(@type@)) { + trans1 = CblasTrans; + lda = N > 1 ? is1_n / sizeof(@type@) : 1; + } + else { + /* If not ColMajor, caller should have ensured we are RowMajor */ + /* will not assert in release mode */ + assert(is1_n == sizeof(@type@)); + trans1 = CblasNoTrans; + lda = N > 1 ? is1_m / sizeof(@type@) : 1; + } + if (is2_n == sizeof(@type@)) { + trans2 = CblasTrans; + ldb = N > 1 ? is2_p / sizeof(@type@) : 1; + } + else { + /* If not ColMajor, caller should have ensured we are RowMajor */ + /* will not assert in release mode */ + assert(is2_p == sizeof(@type@)); + trans2 = CblasNoTrans; + ldb = P > 1 ? P : 1; + } + /* + * Use syrk if we have a case of a matrix times its transpose. + * Otherwise, use gemm for all other cases. + */ + if ( + (ip1 == ip2) && + (m == p) && + (is1_m == is2_p) && + (is1_n == is2_n) && + (trans1 != trans2) + ) { + npy_intp i,j; + if (trans1 == CblasNoTrans) { + cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@, + ip1, lda, @step0@, op, P); + } + else { + cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@, + ip1, ldb, @step0@, op, P); + } + /* Copy the triangle */ + for (i = 0; i < P; i++) { + for (j = i + 1; j < P; j++) { + ((@type@*)op)[j * P + i] = ((@type@*)op)[i * P + j]; + } + } + + } + else { + cblas_@prefix@gemm(order, trans1, trans2, M, P, N, @step1@, ip1, lda, + ip2, ldb, @step0@, op, P); + } +} + +/**end repeat**/ +#endif + +/* + * matmul loops + * signature is (m?,n),(n,p?)->(m?,p?) + */ + +/**begin repeat + * #TYPE = FLOAT, DOUBLE, HALF# + * #typ = npy_float,npy_double,npy_half# + * #SPECL = 0,0,1# + */ + +NPY_NO_EXPORT void +@TYPE@_matmul_inner_noblas(char *ip1, char *ip2, char *op, + npy_intp dm, npy_intp dn, npy_intp dp, + npy_intp ib1_n, npy_intp ib2_n, npy_intp ib2_p, + npy_intp ob_p, npy_intp is1_m, npy_intp is1_n, + npy_intp is2_n, npy_intp is2_p, npy_intp os_m, + npy_intp os_p) +{ + npy_intp m, n, p; + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { + /* + * Use a double as an intermediate sum, which is natural for + * npy_double, slightly increases the accuracy of npy_float, + * and is perhaps overkill for npy_half. + */ + double sum = 0; + for (n = 0; n < dn; n++) { +#if @SPECL@ == 1 + @typ@ val1 = (*(@typ@ *)ip1); + @typ@ val2 = (*(@typ@ *)ip2); + sum += npy_half_to_float(val1) * npy_half_to_float(val2); +#else + sum += ((double)*(@typ@ *)ip1) * ((double)*(@typ@ *)ip2); +#endif + ip2 += is2_n; + ip1 += is1_n; + } +#if @SPECL@ == 1 + *(@typ@ *)op = npy_float_to_half((float)sum); +#else + /* in the case of double -> float, may produce INF */ + *(@typ@ *)op = (@typ@)sum; +#endif + ip1 -= ib1_n; + ip2 -= ib2_n; + op += os_p; + ip2 += is2_p; + } + op -= ob_p; + ip2 -= ib2_p; + ip1 += is1_m; + op += os_m; + } +} + +/**end repeat**/ + +/**begin repeat + * #TYPE = LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * BOOL# + * #typ = npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_bool# + * #IS_COMPLEX = 0, 1, 1, 1, 0*11# + */ + +NPY_NO_EXPORT void +@TYPE@_matmul_inner_noblas(char *ip1, char *ip2, char *op, + npy_intp dm, npy_intp dn, npy_intp dp, + npy_intp ib1_n, npy_intp ib2_n, npy_intp ib2_p, + npy_intp ob_p, npy_intp is1_m, npy_intp is1_n, + npy_intp is2_n, npy_intp is2_p, npy_intp os_m, + npy_intp os_p) +{ + npy_intp m, n, p; + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { +#if @IS_COMPLEX@ == 1 + (*(@typ@ *)op).real = 0; + (*(@typ@ *)op).imag = 0; +#else + *(@typ@ *)op = 0; +#endif + for (n = 0; n < dn; n++) { + @typ@ val1 = (*(@typ@ *)ip1); + @typ@ val2 = (*(@typ@ *)ip2); +#if @IS_COMPLEX@ == 1 + (*(@typ@ *)op).real += (val1.real * val2.real) - + (val1.imag * val2.imag); + (*(@typ@ *)op).imag += (val1.real * val2.imag) + + (val1.imag * val2.real); +#else + *(@typ@ *)op += val1 * val2; +#endif + ip2 += is2_n; + ip1 += is1_n; + } + ip1 -= ib1_n; + ip2 -= ib2_n; + op += os_p; + ip2 += is2_p; + } + op -= ob_p; + ip2 -= ib2_p; + ip1 += is1_m; + op += os_m; + } +} + +/**end repeat**/ + +/**begin repeat + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * BOOL# + * #typ = npy_float,npy_double,npy_longdouble, npy_half, + * npy_cfloat, npy_cdouble, npy_clongdouble, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_bool# + * #SPECL = 0, 0, 0, 2, 1, 1, 1, 0*11# + * #USEBLAS = 1, 1, 0, 0, 1, 1, 0*12# + * #chr = s, d, 0, 0, c, z, 0*12# + * #blas_typ = npy_float, npy_double, void*16# + */ + + +NPY_NO_EXPORT void +@TYPE@_matmul(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + npy_intp dOuter = *dimensions++; + npy_intp iOuter; + npy_intp s0 = *steps++; + npy_intp s1 = *steps++; + npy_intp s2 = *steps++; + npy_intp dm = dimensions[0]; + npy_intp dn = dimensions[1]; + npy_intp dp = dimensions[2]; + npy_intp is1_m=steps[0], is1_n=steps[1], is2_n=steps[2], is2_p=steps[3], + os_m=steps[4], os_p=steps[5]; + npy_intp ib1_n, ib2_n, ib2_p, ob_p; +#if @USEBLAS@ & defined(HAVE_CBLAS) + npy_bool special_case = (dm == 1 || dn == 1 || dp == 1); + npy_bool scalar_out = (dm ==1 && dp == 1); + npy_bool scalar_vec = (dn == 1 && (dp == 1 || dm == 1)); + npy_bool too_big_for_blas = (dm > NPY_MAX_INT || dn > NPY_MAX_INT || + dp >= NPY_MAX_INT); + npy_bool input_contiguous = ((is1_m == sizeof(@typ@) || + is1_n == sizeof(@typ@)) && + (is2_n == sizeof(@typ@) || + is2_p == sizeof(@typ@))); + npy_bool vector_matrix = ((dm == 1) && + (is2_n == sizeof(@typ@) || (is2_n == sizeof(@typ@) * dp))); + npy_bool matrix_vector = ((dp == 1) && + (is1_n == sizeof(@typ@) || (is1_n == sizeof(@typ@) * dm))); +#endif + + ib1_n = is1_n*dn; + ib2_n = is2_n*dn; + ib2_p = is2_p*dp; + ob_p = os_p *dp; + + if (dn == 0) { + /* No operand, need to zero the output */ + for (iOuter = 0; iOuter < dOuter; iOuter++, + args[0] += s0, args[1] += s1, args[2] += s2) { + npy_intp m, p; + char *op=args[2]; + for (m = 0; m < dm; m++) { + for (p = 0; p < dp; p++) { +#if @SPECL@ == 1 + (*(@typ@ *)op).real = 0; + (*(@typ@ *)op).imag = 0; +#else + *(@typ@ *)op = 0; +#endif + op += os_p; + } + op += os_m - ob_p; + } + } + return; + } + for (iOuter = 0; iOuter < dOuter; iOuter++, + args[0] += s0, args[1] += s1, args[2] += s2) { + char *ip1=args[0], *ip2=args[1], *op=args[2]; +#if @USEBLAS@ & defined(HAVE_CBLAS) + /* + * TODO: refactor this out to a inner_loop_selector, in + * PyUFunc_MatmulLoopSelector. But that call does not have access to + * n, m, p and strides. + */ + if (too_big_for_blas) { + @TYPE@_matmul_inner_noblas(ip1, ip2, op, dm, dn, dp, + ib1_n, ib2_n, ib2_p, ob_p, + is1_m, is1_n, is2_n, is2_p, os_m, os_p); + } + else if (special_case) { + /* Special case variants that have a 1 in the core dimensions */ + if (scalar_out) { + /* row @ column, 1,1 output */ + @TYPE@_dot(ip1, is1_n, ip2, is2_n, op, dn, NULL); + } else if (scalar_vec){ + /* + * 0d @ vector or vector @ 0d + * could use cblas_Xaxy, but that requires 0ing output + * and would not be faster (XXX prove it) + */ + @TYPE@_matmul_inner_noblas(ip1, ip2, op, dm, dn, dp, + ib1_n, ib2_n, ib2_p, ob_p, + is1_m, is1_n, is2_n, is2_p, os_m, os_p); + } else if (vector_matrix) { + /* vector @ matrix, switch ip1, ip2, p and m */ + @TYPE@_gemv((void*)ip2, is2_n, (void*)ip1, is1_n, + (void*)op, dp, dn); + } else if (matrix_vector) { + /* matrix @ vector */ + @TYPE@_gemv((void*)ip1, is1_n, (void*)ip2, is2_n, + (void*)op, dm, dn); + } else { + /* column @ row, 2d output, no blas needed or non-contiguous input */ + @TYPE@_matmul_inner_noblas(ip1, ip2, op, dm, dn, dp, + ib1_n, ib2_n, ib2_p, ob_p, + is1_m, is1_n, is2_n, is2_p, os_m, os_p); + } + } else { + /* matrix @ matrix */ + if (input_contiguous) { + /* can only use blas if input is contiguous */ + @TYPE@_matmul_matrixmatrix((void*)ip1, is1_m, is1_n, + (void*)ip2, is2_n, is2_p, + (void*)op, dm, dn, dp); + } else { + @TYPE@_matmul_inner_noblas(ip1, ip2, op, dm, dn, dp, + ib1_n, ib2_n, ib2_p, ob_p, + is1_m, is1_n, is2_n, is2_p, os_m, os_p); + } + } +#else + @TYPE@_matmul_inner_noblas(ip1, ip2, op, dm, dn, dp, + ib1_n, ib2_n, ib2_p, ob_p, + is1_m, is1_n, is2_n, is2_p, os_m, os_p); + +#endif + } +} + +/**end repeat**/ + + diff --git a/numpy/core/src/umath/matmul.h.src b/numpy/core/src/umath/matmul.h.src new file mode 100644 index 000000000..16be7675b --- /dev/null +++ b/numpy/core/src/umath/matmul.h.src @@ -0,0 +1,12 @@ +/**begin repeat + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF, + * CFLOAT, CDOUBLE, CLONGDOUBLE, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * BOOL# + **/ +NPY_NO_EXPORT void +@TYPE@_matmul(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); +/**end repeat**/ + + diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index e98d9f865..a7987acda 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -1564,7 +1564,6 @@ static PyObject* } /**end repeat**/ - /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, * long, ulong, longlong, ulonglong, @@ -1575,8 +1574,7 @@ static PyNumberMethods @name@_as_number = { (binaryfunc)@name@_add, /*nb_add*/ (binaryfunc)@name@_subtract, /*nb_subtract*/ (binaryfunc)@name@_multiply, /*nb_multiply*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) (binaryfunc)@name@_divide, /*nb_divide*/ #endif (binaryfunc)@name@_remainder, /*nb_remainder*/ @@ -1596,8 +1594,7 @@ static PyNumberMethods @name@_as_number = { (binaryfunc)@name@_and, /*nb_and*/ (binaryfunc)@name@_xor, /*nb_xor*/ (binaryfunc)@name@_or, /*nb_or*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) 0, /*nb_coerce*/ #endif (unaryfunc)@name@_int, /*nb_int*/ @@ -1607,16 +1604,14 @@ static PyNumberMethods @name@_as_number = { (unaryfunc)@name@_long, /*nb_long*/ #endif (unaryfunc)@name@_float, /*nb_float*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) (unaryfunc)@name@_oct, /*nb_oct*/ (unaryfunc)@name@_hex, /*nb_hex*/ #endif 0, /*inplace_add*/ 0, /*inplace_subtract*/ 0, /*inplace_multiply*/ -#if defined(NPY_PY3K) -#else +#if !defined(NPY_PY3K) 0, /*inplace_divide*/ #endif 0, /*inplace_remainder*/ @@ -1631,6 +1626,10 @@ static PyNumberMethods @name@_as_number = { 0, /*nb_inplace_floor_divide*/ 0, /*nb_inplace_true_divide*/ (unaryfunc)NULL, /*nb_index*/ +#if PY_VERSION_HEX >= 0x03050000 + 0, /*nb_matrix_multiply*/ + 0, /*nb_inplace_matrix_multiply*/ +#endif }; /**end repeat**/ diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 1fe8745a0..66f512f7b 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2845,13 +2845,15 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, } /* Fill in any allocated outputs */ - for (i = nin; i < nop; ++i) { - if (op[i] == NULL) { - op[i] = NpyIter_GetOperandArray(iter)[i]; - Py_INCREF(op[i]); + { + PyArrayObject **operands = NpyIter_GetOperandArray(iter); + for (i = 0; i < nop; ++i) { + if (op[i] == NULL) { + op[i] = operands[i]; + Py_INCREF(op[i]); + } } } - /* * Set up the inner strides array. Because we're not doing * buffering, the strides are fixed throughout the looping. diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 6b042d837..efd923972 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -22,6 +22,11 @@ #include "ufunc_object.h" #include "common.h" +#include "mem_overlap.h" +#if defined(HAVE_CBLAS) +#include "cblasfuncs.h" +#endif + static const char * npy_casting_to_string(NPY_CASTING casting) { @@ -1299,6 +1304,88 @@ PyUFunc_MixedDivisionTypeResolver(PyUFuncObject *ufunc, type_tup, out_dtypes); } +/* + * XXX This is too restrictive, we should only check the inner axis involved + * gh-12365 + */ +#define NOT_CONTIGUOUS(a) (PyArray_NDIM(a) >=2 && !( \ + PyArray_IS_C_CONTIGUOUS(a) || PyArray_IS_F_CONTIGUOUS(a))) + +/* + * This function applies special type resolution rules for the case + * where all the functions have the pattern XX->X, using + * PyArray_ResultType instead of a linear search to get the best + * loop, like PyUFunc_SimpleBinaryOperationTypeResolver, and adds + * memory overlap and contiguity considerations to the operands, + * possibly creating Writeback temporary data + * + * Returns 0 on success, -1 on error. + */ +NPY_NO_EXPORT int +PyUFunc_MatmulTypeResolver(PyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes) +{ + + if (PyUFunc_SimpleBinaryOperationTypeResolver(ufunc, casting, operands, + type_tup, out_dtypes) < 0) { + return -1; + } + if (PyArray_NDIM(operands[0]) >= 2 || PyArray_NDIM(operands[1]) >= 2) { +#if defined(HAVE_CBLAS) + int typenum = out_dtypes[2]->type_num; + if ( (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum || + NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) { + /* + * We are going to use BLAS + * make sure 2d and more arrays are contiguous + */ + if (_bad_strides(operands[0]) || NOT_CONTIGUOUS(operands[0])) { + PyObject *op = PyArray_NewCopy(operands[0], NPY_ANYORDER); + + if (op == NULL) { + return -1; + } + Py_DECREF(operands[0]); + operands[0] = (PyArrayObject *)op; + } + if (_bad_strides(operands[1]) || NOT_CONTIGUOUS(operands[1])) { + PyObject *op = PyArray_NewCopy(operands[1], NPY_ANYORDER); + + if (op == NULL) { + return -1; + } + Py_DECREF(operands[1]); + operands[1] = (PyArrayObject *)op; + } + } +#endif + if (operands[2] != NULL) { + npy_intp last_stride = PyArray_STRIDE(operands[2], + PyArray_NDIM(operands[2]) - 1); + if (last_stride != PyArray_ITEMSIZE(operands[2])) { + PyErr_SetString(PyExc_ValueError, + "output array is not acceptable (must be C-contiguous)"); + return -1; + } + /* + * Use all the flags in PyUFunc_GeneralizedFunction + * except NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE and + * NPY_ITER_WRITEONLY. Without NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE + * nditer will properly check overlap between output and any input. + */ + ufunc->op_flags[2] = NPY_ITER_WRITEONLY | + NPY_ITER_UPDATEIFCOPY | + NPY_ITER_ALIGNED | + NPY_ITER_ALLOCATE | + NPY_ITER_NO_BROADCAST; + } + } + return 0; +} + static int find_userloop(PyUFuncObject *ufunc, diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index bb4823d24..5306a5983 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -145,5 +145,11 @@ PyUFunc_DefaultMaskedInnerLoopSelector(PyUFuncObject *ufunc, NpyAuxData **out_innerloopdata, int *out_needs_api); +NPY_NO_EXPORT int +PyUFunc_MatmulTypeResolver(PyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes); #endif diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 51fe6e9ef..80c69d9b3 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -2595,7 +2595,8 @@ class TestMethods(object): assert_equal(x1.flatten('F'), y1f) assert_equal(x1.flatten('F'), x1.T.flatten()) - def test_dot(self): + @pytest.mark.parametrize('func', (np.dot, np.matmul)) + def test_arr_mult(self, func): a = np.array([[1, 0], [0, 1]]) b = np.array([[0, 1], [1, 0]]) c = np.array([[9, 1], [1, -9]]) @@ -2619,49 +2620,49 @@ class TestMethods(object): # gemm vs syrk optimizations for et in [np.float32, np.float64, np.complex64, np.complex128]: eaf = a.astype(et) - assert_equal(np.dot(eaf, eaf), eaf) - assert_equal(np.dot(eaf.T, eaf), eaf) - assert_equal(np.dot(eaf, eaf.T), eaf) - assert_equal(np.dot(eaf.T, eaf.T), eaf) - assert_equal(np.dot(eaf.T.copy(), eaf), eaf) - assert_equal(np.dot(eaf, eaf.T.copy()), eaf) - assert_equal(np.dot(eaf.T.copy(), eaf.T.copy()), eaf) + assert_equal(func(eaf, eaf), eaf) + assert_equal(func(eaf.T, eaf), eaf) + assert_equal(func(eaf, eaf.T), eaf) + assert_equal(func(eaf.T, eaf.T), eaf) + assert_equal(func(eaf.T.copy(), eaf), eaf) + assert_equal(func(eaf, eaf.T.copy()), eaf) + assert_equal(func(eaf.T.copy(), eaf.T.copy()), eaf) # syrk validations for et in [np.float32, np.float64, np.complex64, np.complex128]: eaf = a.astype(et) ebf = b.astype(et) - assert_equal(np.dot(ebf, ebf), eaf) - assert_equal(np.dot(ebf.T, ebf), eaf) - assert_equal(np.dot(ebf, ebf.T), eaf) - assert_equal(np.dot(ebf.T, ebf.T), eaf) + assert_equal(func(ebf, ebf), eaf) + assert_equal(func(ebf.T, ebf), eaf) + assert_equal(func(ebf, ebf.T), eaf) + assert_equal(func(ebf.T, ebf.T), eaf) # syrk - different shape, stride, and view validations for et in [np.float32, np.float64, np.complex64, np.complex128]: edf = d.astype(et) assert_equal( - np.dot(edf[::-1, :], edf.T), - np.dot(edf[::-1, :].copy(), edf.T.copy()) + func(edf[::-1, :], edf.T), + func(edf[::-1, :].copy(), edf.T.copy()) ) assert_equal( - np.dot(edf[:, ::-1], edf.T), - np.dot(edf[:, ::-1].copy(), edf.T.copy()) + func(edf[:, ::-1], edf.T), + func(edf[:, ::-1].copy(), edf.T.copy()) ) assert_equal( - np.dot(edf, edf[::-1, :].T), - np.dot(edf, edf[::-1, :].T.copy()) + func(edf, edf[::-1, :].T), + func(edf, edf[::-1, :].T.copy()) ) assert_equal( - np.dot(edf, edf[:, ::-1].T), - np.dot(edf, edf[:, ::-1].T.copy()) + func(edf, edf[:, ::-1].T), + func(edf, edf[:, ::-1].T.copy()) ) assert_equal( - np.dot(edf[:edf.shape[0] // 2, :], edf[::2, :].T), - np.dot(edf[:edf.shape[0] // 2, :].copy(), edf[::2, :].T.copy()) + func(edf[:edf.shape[0] // 2, :], edf[::2, :].T), + func(edf[:edf.shape[0] // 2, :].copy(), edf[::2, :].T.copy()) ) assert_equal( - np.dot(edf[::2, :], edf[:edf.shape[0] // 2, :].T), - np.dot(edf[::2, :].copy(), edf[:edf.shape[0] // 2, :].T.copy()) + func(edf[::2, :], edf[:edf.shape[0] // 2, :].T), + func(edf[::2, :].copy(), edf[:edf.shape[0] // 2, :].T.copy()) ) # syrk - different shape @@ -2669,9 +2670,13 @@ class TestMethods(object): edf = d.astype(et) eddtf = ddt.astype(et) edtdf = dtd.astype(et) - assert_equal(np.dot(edf, edf.T), eddtf) - assert_equal(np.dot(edf.T, edf), edtdf) + assert_equal(func(edf, edf.T), eddtf) + assert_equal(func(edf.T, edf), edtdf) + def test_dot(self): + a = np.array([[1, 0], [0, 1]]) + b = np.array([[0, 1], [1, 0]]) + c = np.array([[9, 1], [1, -9]]) # function versus methods assert_equal(np.dot(a, b), a.dot(b)) assert_equal(np.dot(np.dot(a, b), c), a.dot(b).dot(c)) @@ -2738,6 +2743,18 @@ class TestMethods(object): assert_raises(NotImplementedError, np.matmul, A(), A()) assert_raises(NotImplementedError, np.inner, A(), A()) + def test_matmul_out(self): + # overlapping memory + a = np.arange(18).reshape(2, 3, 3) + b = np.matmul(a, a) + c = np.matmul(a, a, out=a) + assert_(c is a) + assert_equal(c, b) + a = np.arange(18).reshape(2, 3, 3) + c = np.matmul(a, a, out=a[::-1, ...]) + assert_(c.base is a.base) + assert_equal(c, b) + def test_diagonal(self): a = np.arange(12).reshape((3, 4)) assert_equal(a.diagonal(), [0, 5, 10]) @@ -3147,6 +3164,8 @@ class TestBinop(object): # 'eq': (np.equal, False), # 'ne': (np.not_equal, False), } + if sys.version_info >= (3, 5): + ops['matmul'] = (np.matmul, False, float) class Coerced(Exception): pass @@ -3189,7 +3208,7 @@ class TestBinop(object): if issubclass(MyType, np.ndarray): # Use this range to avoid special case weirdnesses around # divide-by-0, pow(x, 2), overflow due to pow(big, big), etc. - return np.arange(3, 5).view(MyType) + return np.arange(3, 7).reshape(2, 2).view(MyType) else: return MyType() @@ -3198,7 +3217,7 @@ class TestBinop(object): for op, (ufunc, has_inplace, dtype) in ops.items(): err_msg = ('op: %s, ufunc: %s, has_inplace: %s, dtype: %s' % (op, ufunc, has_inplace, dtype)) - check_objs = [np.arange(3, 5, dtype=dtype)] + check_objs = [np.arange(3, 7, dtype=dtype).reshape(2, 2)] if check_scalar: check_objs.append(check_objs[0][0]) for arr in check_objs: @@ -5697,13 +5716,36 @@ class MatmulCommon(object): res = self.matmul(v, v) assert_(type(res) is np.dtype(dt).type) - def test_vector_vector_values(self): - vec = np.array([1, 2]) - tgt = 5 + def test_0d_vector_values(self): + vec1 = np.array([2]) + vec2 = np.array([3, 4]).reshape(1, -1) + tgt = np.array([6, 8]) for dt in self.types[1:]: - v1 = vec.astype(dt) - res = self.matmul(v1, v1) + v1 = vec1.astype(dt) + v2 = vec2.astype(dt) + res = self.matmul(v1, v2) assert_equal(res, tgt) + res = self.matmul(v2.T, v1) + assert_equal(res, tgt) + + # boolean type + vec = np.array([True, True], dtype='?').reshape(1, -1) + res = self.matmul(vec[:, 0], vec) + assert_equal(res, True) + + def test_vector_vector_values(self): + vec1 = np.array([1, 2]) + vec2 = np.array([3, 4]).reshape(-1, 1) + tgt1 = np.array([11]) + tgt2 = np.array([[3, 6], [4, 8]]) + for dt in self.types[1:]: + v1 = vec1.astype(dt) + v2 = vec2.astype(dt) + res = self.matmul(v1, v2) + assert_equal(res, tgt1) + # no broadcast, we must make v1 into a 2d ndarray + res = self.matmul(v2, v1.reshape(1, -1)) + assert_equal(res, tgt2) # boolean type vec = np.array([True, True], dtype='?') @@ -5851,27 +5893,52 @@ class TestMatmul(MatmulCommon): assert_array_equal(out, tgt, err_msg=msg) # test out with not allowed type cast (safe casting) - # einsum and cblas raise different error types, so - # use Exception. - msg = "out argument with illegal cast" + msg = "Cannot cast ufunc matmul output" out = np.zeros((2, 2), dtype=np.int32) - assert_raises(Exception, self.matmul, a, b, out=out) + assert_raises_regex(TypeError, msg, self.matmul, a, b, out=out) - # skip following tests for now, cblas does not allow non-contiguous + # cblas does not allow non-contiguous # outputs and consistency with dot would require same type, # dimensions, subtype, and c_contiguous. - # test out with allowed type cast - # msg = "out argument with allowed cast" - # out = np.zeros((2, 2), dtype=np.complex128) - # self.matmul(a, b, out=out) - # assert_array_equal(out, tgt, err_msg=msg) + # test out with type upcast to complex + out = np.zeros((2, 2), dtype=np.complex128) + c = self.matmul(a, b, out=out) + assert_(c is out) + with suppress_warnings() as sup: + sup.filter(np.ComplexWarning, '') + c = c.astype(tgt.dtype) + assert_array_equal(c, tgt) # test out non-contiguous - # msg = "out argument with non-contiguous layout" - # c = np.zeros((2, 2, 2), dtype=float) - # self.matmul(a, b, out=c[..., 0]) - # assert_array_equal(c, tgt, err_msg=msg) + out = np.ones((2, 2, 2), dtype=float) + assert_raises(ValueError, self.matmul, a, b, out=out[..., 0]) + + m1 = np.arange(15.).reshape(5, 3) + m2 = np.arange(21.).reshape(3, 7) + m3 = np.arange(30.).reshape(5, 6)[:, ::2] # non-contiguous + vc = np.arange(10.) + vr = np.arange(6.) + @pytest.mark.parametrize('args', ( + # matrix-matrix + (m1, m2), (m2.T, m1.T), (m2.T.copy(), m1.T), (m2.T, m1.T.copy()), + # matrix-matrix-transpose, contiguous and non + (m1, m1.T), (m1.T, m1), (m1, m3.T), (m3, m1.T), + (m3, m3.T), (m3.T, m3), + # matrix-matrix non-contiguous + (m3, m2), (m2.T, m3.T), (m2.T.copy(), m3.T), + # vector-matrix, matrix-vector, contiguous + (m1, vr[:3]), (vc[:5], m1), (m1.T, vc[:5]), (vr[:3], m1.T), + # vector-matrix, matrix-vector, vector non-contiguous + (m1, vr[::2]), (vc[::2], m1), (m1.T, vc[::2]), (vr[::2], m1.T), + # vector-matrix, matrix-vector, matrix non-contiguous + (m3, vr[:3]), (vc[:5], m3), (m3.T, vc[:5]), (vr[:3], m3.T), + # vector-matrix, matrix-vector, both non-contiguous + (m3, vr[::2]), (vc[::2], m3), (m3.T, vc[::2]), (vr[::2], m3.T))) + def test_dot_equivalent(self, args): + r1 = np.matmul(*args) + r2 = np.dot(*args) + assert_equal(r1, r2) if sys.version_info[:2] >= (3, 5): @@ -5895,6 +5962,11 @@ if sys.version_info[:2] >= (3, 5): assert_equal(self.matmul(a, b), "A") assert_equal(self.matmul(b, a), "A") + def test_matmul_raises(self): + assert_raises(TypeError, self.matmul, np.int8(5), np.int8(5)) + assert_raises(TypeError, self.matmul, np.void(b'abc'), np.void(b'abc')) + assert_raises(ValueError, self.matmul, np.arange(10), np.void(b'abc')) + def test_matmul_inplace(): # It would be nice to support in-place matmul eventually, but for now # we don't have a working implementation, so better just to error out @@ -5909,6 +5981,17 @@ if sys.version_info[:2] >= (3, 5): exec_ = getattr(builtins, "exec") assert_raises(TypeError, exec_, "a @= b", globals(), locals()) + def test_matmul_axes(): + a = np.arange(3*4*5).reshape(3, 4, 5) + c = np.matmul(a, a, axes=[(-2, -1), (-1, -2), (1, 2)]) + assert c.shape == (3, 4, 4) + d = np.matmul(a, a, axes=[(-2, -1), (-1, -2), (0, 1)]) + assert d.shape == (4, 4, 3) + e = np.swapaxes(d, 0, 2) + assert_array_equal(e, c) + f = np.matmul(a, np.arange(3), axes=[(1, 0), (0), (0)]) + assert f.shape == (4, 5) + class TestInner(object): |