/* * This file contains the implementation of the 'einsum' function, * which provides an einstein-summation operation. * * Copyright (c) 2011 by Mark Wiebe (mwwiebe@gmail.com) * The University of British Columbia * * See LICENSE.txt for the license. */ #define PY_SSIZE_T_CLEAN #include #include #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE #include #include #include #include //PyArray_AssignRawScalar #include #include "convert.h" #include "common.h" #include "ctors.h" #include "einsum_sumprod.h" #include "einsum_debug.h" /* * Parses the subscripts for one operand into an output of 'ndim' * labels. The resulting 'op_labels' array will have: * - the ASCII code of the label for the first occurrence of a label; * - the (negative) offset to the first occurrence of the label for * repeated labels; * - zero for broadcast dimensions, if subscripts has an ellipsis. * For example: * - subscripts="abbcbc", ndim=6 -> op_labels=[97, 98, -1, 99, -3, -2] * - subscripts="ab...bc", ndim=6 -> op_labels=[97, 98, 0, 0, -3, 99] */ static int parse_operand_subscripts(char *subscripts, int length, int ndim, int iop, char *op_labels, char *label_counts, int *min_label, int *max_label) { int i; int idim = 0; int ellipsis = -1; /* Process all labels for this operand */ for (i = 0; i < length; ++i) { int label = subscripts[i]; /* A proper label for an axis. */ if (label > 0 && isalpha(label)) { /* Check we don't exceed the operator dimensions. */ if (idim >= ndim) { PyErr_Format(PyExc_ValueError, "einstein sum subscripts string contains " "too many subscripts for operand %d", iop); return -1; } op_labels[idim++] = label; if (label < *min_label) { *min_label = label; } if (label > *max_label) { *max_label = label; } label_counts[label]++; } /* The beginning of the ellipsis. */ else if (label == '.') { /* Check it's a proper ellipsis. */ if (ellipsis != -1 || i + 2 >= length || subscripts[++i] != '.' || subscripts[++i] != '.') { PyErr_Format(PyExc_ValueError, "einstein sum subscripts string contains a " "'.' that is not part of an ellipsis ('...') " "in operand %d", iop); return -1; } ellipsis = idim; } else if (label != ' ') { PyErr_Format(PyExc_ValueError, "invalid subscript '%c' in einstein sum " "subscripts string, subscripts must " "be letters", (char)label); return -1; } } /* No ellipsis found, labels must match dimensions exactly. */ if (ellipsis == -1) { if (idim != ndim) { PyErr_Format(PyExc_ValueError, "operand has more dimensions than subscripts " "given in einstein sum, but no '...' ellipsis " "provided to broadcast the extra dimensions."); return -1; } } /* Ellipsis found, may have to add broadcast dimensions. */ else if (idim < ndim) { /* Move labels after ellipsis to the end. */ for (i = 0; i < idim - ellipsis; ++i) { op_labels[ndim - i - 1] = op_labels[idim - i - 1]; } /* Set all broadcast dimensions to zero. */ for (i = 0; i < ndim - idim; ++i) { op_labels[ellipsis + i] = 0; } } /* * Find any labels duplicated for this operand, and turn them * into negative offsets to the axis to merge with. * * In C, the char type may be signed or unsigned, but with * twos complement arithmetic the char is ok either way here, and * later where it matters the char is cast to a signed char. */ for (idim = 0; idim < ndim - 1; ++idim) { int label = (signed char)op_labels[idim]; /* If it is a proper label, find any duplicates of it. */ if (label > 0) { /* Search for the next matching label. */ char *next = memchr(op_labels + idim + 1, label, ndim - idim - 1); while (next != NULL) { /* The offset from next to op_labels[idim] (negative). */ *next = (char)((op_labels + idim) - next); /* Search for the next matching label. */ next = memchr(next + 1, label, op_labels + ndim - 1 - next); } } } return 0; } /* * Parses the subscripts for the output operand into an output that * includes 'ndim_broadcast' unlabeled dimensions, and returns the total * number of output dimensions, or -1 if there is an error. Similarly * to parse_operand_subscripts, the 'out_labels' array will have, for * each dimension: * - the ASCII code of the corresponding label; * - zero for broadcast dimensions, if subscripts has an ellipsis. */ static int parse_output_subscripts(char *subscripts, int length, int ndim_broadcast, const char *label_counts, char *out_labels) { int i, bdim; int ndim = 0; int ellipsis = 0; /* Process all the output labels. */ for (i = 0; i < length; ++i) { int label = subscripts[i]; /* A proper label for an axis. */ if (label > 0 && isalpha(label)) { /* Check that it doesn't occur again. */ if (memchr(subscripts + i + 1, label, length - i - 1) != NULL) { PyErr_Format(PyExc_ValueError, "einstein sum subscripts string includes " "output subscript '%c' multiple times", (char)label); return -1; } /* Check that it was used in the inputs. */ if (label_counts[label] == 0) { PyErr_Format(PyExc_ValueError, "einstein sum subscripts string included " "output subscript '%c' which never appeared " "in an input", (char)label); return -1; } /* Check that there is room in out_labels for this label. */ if (ndim >= NPY_MAXDIMS) { PyErr_Format(PyExc_ValueError, "einstein sum subscripts string contains " "too many subscripts in the output"); return -1; } out_labels[ndim++] = label; } /* The beginning of the ellipsis. */ else if (label == '.') { /* Check it is a proper ellipsis. */ if (ellipsis || i + 2 >= length || subscripts[++i] != '.' || subscripts[++i] != '.') { PyErr_SetString(PyExc_ValueError, "einstein sum subscripts string " "contains a '.' that is not part of " "an ellipsis ('...') in the output"); return -1; } /* Check there is room in out_labels for broadcast dims. */ if (ndim + ndim_broadcast > NPY_MAXDIMS) { PyErr_Format(PyExc_ValueError, "einstein sum subscripts string contains " "too many subscripts in the output"); return -1; } ellipsis = 1; for (bdim = 0; bdim < ndim_broadcast; ++bdim) { out_labels[ndim++] = 0; } } else if (label != ' ') { PyErr_Format(PyExc_ValueError, "invalid subscript '%c' in einstein sum " "subscripts string, subscripts must " "be letters", (char)label); return -1; } } /* If no ellipsis was found there should be no broadcast dimensions. */ if (!ellipsis && ndim_broadcast > 0) { PyErr_SetString(PyExc_ValueError, "output has more dimensions than subscripts " "given in einstein sum, but no '...' ellipsis " "provided to broadcast the extra dimensions."); return -1; } return ndim; } /* * When there's just one operand and no reduction we can return a view * into 'op'. This calculates the view and stores it in 'ret', if * possible. Returns -1 on error, 0 otherwise. Note that a 0 return * does not mean that a view was successfully created. */ static int get_single_op_view(PyArrayObject *op, char *labels, int ndim_output, char *output_labels, PyArrayObject **ret) { npy_intp new_strides[NPY_MAXDIMS]; npy_intp new_dims[NPY_MAXDIMS]; char *out_label; int label, i, idim, ndim, ibroadcast = 0; ndim = PyArray_NDIM(op); /* Initialize the dimensions and strides to zero */ for (idim = 0; idim < ndim_output; ++idim) { new_dims[idim] = 0; new_strides[idim] = 0; } /* Match the labels in the operand with the output labels */ for (idim = 0; idim < ndim; ++idim) { /* * The char type may be either signed or unsigned, we * need it to be signed here. */ label = (signed char)labels[idim]; /* If this label says to merge axes, get the actual label */ if (label < 0) { label = labels[idim+label]; } /* If the label is 0, it's an unlabeled broadcast dimension */ if (label == 0) { /* The next output label that's a broadcast dimension */ for (; ibroadcast < ndim_output; ++ibroadcast) { if (output_labels[ibroadcast] == 0) { break; } } if (ibroadcast == ndim_output) { PyErr_SetString(PyExc_ValueError, "output had too few broadcast dimensions"); return -1; } new_dims[ibroadcast] = PyArray_DIM(op, idim); new_strides[ibroadcast] = PyArray_STRIDE(op, idim); ++ibroadcast; } else { /* Find the position for this dimension in the output */ out_label = (char *)memchr(output_labels, label, ndim_output); /* If it's not found, reduction -> can't return a view */ if (out_label == NULL) { break; } /* Update the dimensions and strides of the output */ i = out_label - output_labels; if (new_dims[i] != 0 && new_dims[i] != PyArray_DIM(op, idim)) { PyErr_Format(PyExc_ValueError, "dimensions in single operand for collapsing " "index '%c' don't match (%d != %d)", label, (int)new_dims[i], (int)PyArray_DIM(op, idim)); return -1; } new_dims[i] = PyArray_DIM(op, idim); new_strides[i] += PyArray_STRIDE(op, idim); } } /* If we processed all the input axes, return a view */ if (idim == ndim) { Py_INCREF(PyArray_DESCR(op)); *ret = (PyArrayObject *)PyArray_NewFromDescr_int( Py_TYPE(op), PyArray_DESCR(op), ndim_output, new_dims, new_strides, PyArray_DATA(op), PyArray_ISWRITEABLE(op) ? NPY_ARRAY_WRITEABLE : 0, (PyObject *)op, (PyObject *)op, 0); if (*ret == NULL) { return -1; } return 0; } /* Return success, but that we couldn't make a view */ *ret = NULL; return 0; } /* * The char type may be either signed or unsigned, we need it to be * signed here. */ static int _any_labels_are_negative(signed char *labels, int ndim) { int idim; for (idim = 0; idim < ndim; ++idim) { if (labels[idim] < 0) { return 1; } } return 0; } /* * Given the labels for an operand array, returns a view of the array * with all repeated labels collapsed into a single dimension along * the corresponding diagonal. The labels are also updated to match * the dimensions of the new array. If no label is repeated, the * original array is reference increased and returned unchanged. */ static PyArrayObject * get_combined_dims_view(PyArrayObject *op, int iop, char *labels) { npy_intp new_strides[NPY_MAXDIMS]; npy_intp new_dims[NPY_MAXDIMS]; int idim, icombine; int icombinemap[NPY_MAXDIMS]; int ndim = PyArray_NDIM(op); PyArrayObject *ret = NULL; /* A fast path to avoid unnecessary calculations. */ if (!_any_labels_are_negative((signed char *)labels, ndim)) { Py_INCREF(op); return op; } /* Combine repeated labels. */ icombine = 0; for(idim = 0; idim < ndim; ++idim) { /* * The char type may be either signed or unsigned, we * need it to be signed here. */ int label = (signed char)labels[idim]; npy_intp dim = PyArray_DIM(op, idim); npy_intp stride = PyArray_STRIDE(op, idim); /* A label seen for the first time, add it to the op view. */ if (label >= 0) { /* * icombinemap maps dimensions in the original array to * their position in the combined dimensions view. */ icombinemap[idim] = icombine; new_dims[icombine] = dim; new_strides[icombine] = stride; ++icombine; } /* A repeated label, find the original one and merge them. */ else { #ifdef __GNUC__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wuninitialized" #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif int i = icombinemap[idim + label]; icombinemap[idim] = -1; if (new_dims[i] != dim) { char orig_label = labels[idim + label]; PyErr_Format(PyExc_ValueError, "dimensions in operand %d for collapsing " "index '%c' don't match (%d != %d)", iop, orig_label, (int)new_dims[i], (int)dim); return NULL; } new_strides[i] += stride; #ifdef __GNUC__ #pragma GCC diagnostic pop #endif } } /* Overwrite labels to match the new operand view. */ for (idim = 0; idim < ndim; ++idim) { int i = icombinemap[idim]; if (i >= 0) { labels[i] = labels[idim]; } } /* The number of dimensions of the combined view. */ ndim = icombine; /* Create a view of the operand with the compressed dimensions. */ Py_INCREF(PyArray_DESCR(op)); ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( Py_TYPE(op), PyArray_DESCR(op), ndim, new_dims, new_strides, PyArray_DATA(op), PyArray_ISWRITEABLE(op) ? NPY_ARRAY_WRITEABLE : 0, (PyObject *)op, (PyObject *)op); return ret; } static int prepare_op_axes(int ndim, int iop, char *labels, int *axes, int ndim_iter, char *iter_labels) { int i, label, ibroadcast; ibroadcast = ndim-1; for (i = ndim_iter-1; i >= 0; --i) { label = iter_labels[i]; /* * If it's an unlabeled broadcast dimension, choose * the next broadcast dimension from the operand. */ if (label == 0) { while (ibroadcast >= 0 && labels[ibroadcast] != 0) { --ibroadcast; } /* * If we used up all the operand broadcast dimensions, * extend it with a "newaxis" */ if (ibroadcast < 0) { axes[i] = -1; } /* Otherwise map to the broadcast axis */ else { axes[i] = ibroadcast; --ibroadcast; } } /* It's a labeled dimension, find the matching one */ else { char *match = memchr(labels, label, ndim); /* If the op doesn't have the label, broadcast it */ if (match == NULL) { axes[i] = -1; } /* Otherwise use it */ else { axes[i] = match - labels; } } } return 0; } static int unbuffered_loop_nop1_ndim2(NpyIter *iter) { npy_intp coord, shape[2], strides[2][2]; char *ptrs[2][2], *ptr; sum_of_products_fn sop; NPY_BEGIN_THREADS_DEF; #if NPY_EINSUM_DBG_TRACING NpyIter_DebugPrint(iter); #endif NPY_EINSUM_DBG_PRINT("running hand-coded 1-op 2-dim loop\n"); NpyIter_GetShape(iter, shape); memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), 2*sizeof(npy_intp)); memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), 2*sizeof(npy_intp)); memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), 2*sizeof(char *)); memcpy(ptrs[1], ptrs[0], 2*sizeof(char*)); sop = get_sum_of_products_function(1, NpyIter_GetDescrArray(iter)[0]->type_num, NpyIter_GetDescrArray(iter)[0]->elsize, strides[0]); if (sop == NULL) { PyErr_SetString(PyExc_TypeError, "invalid data type for einsum"); return -1; } /* * Since the iterator wasn't tracking coordinates, the * loop provided by the iterator is in Fortran-order. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[1] * shape[0]); } for (coord = shape[1]; coord > 0; --coord) { sop(1, ptrs[0], strides[0], shape[0]); if (needs_api && PyErr_Occurred()){ return -1; } ptr = ptrs[1][0] + strides[1][0]; ptrs[0][0] = ptrs[1][0] = ptr; ptr = ptrs[1][1] + strides[1][1]; ptrs[0][1] = ptrs[1][1] = ptr; } NPY_END_THREADS; return 0; } static int unbuffered_loop_nop1_ndim3(NpyIter *iter) { npy_intp coords[2], shape[3], strides[3][2]; char *ptrs[3][2], *ptr; sum_of_products_fn sop; NPY_BEGIN_THREADS_DEF; #if NPY_EINSUM_DBG_TRACING NpyIter_DebugPrint(iter); #endif NPY_EINSUM_DBG_PRINT("running hand-coded 1-op 3-dim loop\n"); NpyIter_GetShape(iter, shape); memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), 2*sizeof(npy_intp)); memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), 2*sizeof(npy_intp)); memcpy(strides[2], NpyIter_GetAxisStrideArray(iter, 2), 2*sizeof(npy_intp)); memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), 2*sizeof(char *)); memcpy(ptrs[1], ptrs[0], 2*sizeof(char*)); memcpy(ptrs[2], ptrs[0], 2*sizeof(char*)); sop = get_sum_of_products_function(1, NpyIter_GetDescrArray(iter)[0]->type_num, NpyIter_GetDescrArray(iter)[0]->elsize, strides[0]); if (sop == NULL) { PyErr_SetString(PyExc_TypeError, "invalid data type for einsum"); return -1; } /* * Since the iterator wasn't tracking coordinates, the * loop provided by the iterator is in Fortran-order. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[2] * shape[1] * shape[0]); } for (coords[1] = shape[2]; coords[1] > 0; --coords[1]) { for (coords[0] = shape[1]; coords[0] > 0; --coords[0]) { sop(1, ptrs[0], strides[0], shape[0]); if (needs_api && PyErr_Occurred()){ return -1; } ptr = ptrs[1][0] + strides[1][0]; ptrs[0][0] = ptrs[1][0] = ptr; ptr = ptrs[1][1] + strides[1][1]; ptrs[0][1] = ptrs[1][1] = ptr; } ptr = ptrs[2][0] + strides[2][0]; ptrs[0][0] = ptrs[1][0] = ptrs[2][0] = ptr; ptr = ptrs[2][1] + strides[2][1]; ptrs[0][1] = ptrs[1][1] = ptrs[2][1] = ptr; } NPY_END_THREADS; return 0; } static int unbuffered_loop_nop2_ndim2(NpyIter *iter) { npy_intp coord, shape[2], strides[2][3]; char *ptrs[2][3], *ptr; sum_of_products_fn sop; NPY_BEGIN_THREADS_DEF; #if NPY_EINSUM_DBG_TRACING NpyIter_DebugPrint(iter); #endif NPY_EINSUM_DBG_PRINT("running hand-coded 2-op 2-dim loop\n"); NpyIter_GetShape(iter, shape); memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), 3*sizeof(npy_intp)); memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), 3*sizeof(npy_intp)); memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), 3*sizeof(char *)); memcpy(ptrs[1], ptrs[0], 3*sizeof(char*)); sop = get_sum_of_products_function(2, NpyIter_GetDescrArray(iter)[0]->type_num, NpyIter_GetDescrArray(iter)[0]->elsize, strides[0]); if (sop == NULL) { PyErr_SetString(PyExc_TypeError, "invalid data type for einsum"); return -1; } /* * Since the iterator wasn't tracking coordinates, the * loop provided by the iterator is in Fortran-order. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[1] * shape[0]); } for (coord = shape[1]; coord > 0; --coord) { sop(2, ptrs[0], strides[0], shape[0]); if(needs_api && PyErr_Occurred()){ return -1; } ptr = ptrs[1][0] + strides[1][0]; ptrs[0][0] = ptrs[1][0] = ptr; ptr = ptrs[1][1] + strides[1][1]; ptrs[0][1] = ptrs[1][1] = ptr; ptr = ptrs[1][2] + strides[1][2]; ptrs[0][2] = ptrs[1][2] = ptr; } NPY_END_THREADS; return 0; } static int unbuffered_loop_nop2_ndim3(NpyIter *iter) { npy_intp coords[2], shape[3], strides[3][3]; char *ptrs[3][3], *ptr; sum_of_products_fn sop; NPY_BEGIN_THREADS_DEF; #if NPY_EINSUM_DBG_TRACING NpyIter_DebugPrint(iter); #endif NPY_EINSUM_DBG_PRINT("running hand-coded 2-op 3-dim loop\n"); NpyIter_GetShape(iter, shape); memcpy(strides[0], NpyIter_GetAxisStrideArray(iter, 0), 3*sizeof(npy_intp)); memcpy(strides[1], NpyIter_GetAxisStrideArray(iter, 1), 3*sizeof(npy_intp)); memcpy(strides[2], NpyIter_GetAxisStrideArray(iter, 2), 3*sizeof(npy_intp)); memcpy(ptrs[0], NpyIter_GetInitialDataPtrArray(iter), 3*sizeof(char *)); memcpy(ptrs[1], ptrs[0], 3*sizeof(char*)); memcpy(ptrs[2], ptrs[0], 3*sizeof(char*)); sop = get_sum_of_products_function(2, NpyIter_GetDescrArray(iter)[0]->type_num, NpyIter_GetDescrArray(iter)[0]->elsize, strides[0]); if (sop == NULL) { PyErr_SetString(PyExc_TypeError, "invalid data type for einsum"); return -1; } /* * Since the iterator wasn't tracking coordinates, the * loop provided by the iterator is in Fortran-order. */ int needs_api = NpyIter_IterationNeedsAPI(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(shape[2] * shape[1] * shape[0]); } for (coords[1] = shape[2]; coords[1] > 0; --coords[1]) { for (coords[0] = shape[1]; coords[0] > 0; --coords[0]) { sop(2, ptrs[0], strides[0], shape[0]); if(needs_api && PyErr_Occurred()){ return -1; } ptr = ptrs[1][0] + strides[1][0]; ptrs[0][0] = ptrs[1][0] = ptr; ptr = ptrs[1][1] + strides[1][1]; ptrs[0][1] = ptrs[1][1] = ptr; ptr = ptrs[1][2] + strides[1][2]; ptrs[0][2] = ptrs[1][2] = ptr; } ptr = ptrs[2][0] + strides[2][0]; ptrs[0][0] = ptrs[1][0] = ptrs[2][0] = ptr; ptr = ptrs[2][1] + strides[2][1]; ptrs[0][1] = ptrs[1][1] = ptrs[2][1] = ptr; ptr = ptrs[2][2] + strides[2][2]; ptrs[0][2] = ptrs[1][2] = ptrs[2][2] = ptr; } NPY_END_THREADS; return 0; } /*NUMPY_API * This function provides summation of array elements according to * the Einstein summation convention. For example: * - trace(a) -> einsum("ii", a) * - transpose(a) -> einsum("ji", a) * - multiply(a,b) -> einsum(",", a, b) * - inner(a,b) -> einsum("i,i", a, b) * - outer(a,b) -> einsum("i,j", a, b) * - matvec(a,b) -> einsum("ij,j", a, b) * - matmat(a,b) -> einsum("ij,jk", a, b) * * subscripts: The string of subscripts for einstein summation. * nop: The number of operands * op_in: The array of operands * dtype: Either NULL, or the data type to force the calculation as. * order: The order for the calculation/the output axes. * casting: What kind of casts should be permitted. * out: Either NULL, or an array into which the output should be placed. * * By default, the labels get placed in alphabetical order * at the end of the output. So, if c = einsum("i,j", a, b) * then c[i,j] == a[i]*b[j], but if c = einsum("j,i", a, b) * then c[i,j] = a[j]*b[i]. * * Alternatively, you can control the output order or prevent * an axis from being summed/force an axis to be summed by providing * indices for the output. This allows us to turn 'trace' into * 'diag', for example. * - diag(a) -> einsum("ii->i", a) * - sum(a, axis=0) -> einsum("i...->", a) * * Subscripts at the beginning and end may be specified by * putting an ellipsis "..." in the middle. For example, * the function einsum("i...i", a) takes the diagonal of * the first and last dimensions of the operand, and * einsum("ij...,jk...->ik...") takes the matrix product using * the first two indices of each operand instead of the last two. * * When there is only one operand, no axes being summed, and * no output parameter, this function returns a view * into the operand instead of making a copy. */ NPY_NO_EXPORT PyArrayObject * PyArray_EinsteinSum(char *subscripts, npy_intp nop, PyArrayObject **op_in, PyArray_Descr *dtype, NPY_ORDER order, NPY_CASTING casting, PyArrayObject *out) { int iop, label, min_label = 127, max_label = 0; char label_counts[128]; char op_labels[NPY_MAXARGS][NPY_MAXDIMS]; char output_labels[NPY_MAXDIMS], *iter_labels; int idim, ndim_output, ndim_broadcast, ndim_iter; PyArrayObject *op[NPY_MAXARGS], *ret = NULL; PyArray_Descr *op_dtypes_array[NPY_MAXARGS], **op_dtypes; int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS]; int *op_axes[NPY_MAXARGS]; npy_uint32 iter_flags, op_flags[NPY_MAXARGS]; NpyIter *iter; sum_of_products_fn sop; npy_intp fixed_strides[NPY_MAXARGS]; /* nop+1 (+1 is for the output) must fit in NPY_MAXARGS */ if (nop >= NPY_MAXARGS) { PyErr_SetString(PyExc_ValueError, "too many operands provided to einstein sum function"); return NULL; } else if (nop < 1) { PyErr_SetString(PyExc_ValueError, "not enough operands provided to einstein sum function"); return NULL; } /* Parse the subscripts string into label_counts and op_labels */ memset(label_counts, 0, sizeof(label_counts)); for (iop = 0; iop < nop; ++iop) { int length = (int)strcspn(subscripts, ",-"); if (iop == nop-1 && subscripts[length] == ',') { PyErr_SetString(PyExc_ValueError, "more operands provided to einstein sum function " "than specified in the subscripts string"); return NULL; } else if(iop < nop-1 && subscripts[length] != ',') { PyErr_SetString(PyExc_ValueError, "fewer operands provided to einstein sum function " "than specified in the subscripts string"); return NULL; } if (parse_operand_subscripts(subscripts, length, PyArray_NDIM(op_in[iop]), iop, op_labels[iop], label_counts, &min_label, &max_label) < 0) { return NULL; } /* Move subscripts to the start of the labels for the next op */ subscripts += length; if (iop < nop-1) { subscripts++; } } /* * Find the number of broadcast dimensions, which is the maximum * number of labels == 0 in an op_labels array. */ ndim_broadcast = 0; for (iop = 0; iop < nop; ++iop) { npy_intp count_zeros = 0; int ndim; char *labels = op_labels[iop]; ndim = PyArray_NDIM(op_in[iop]); for (idim = 0; idim < ndim; ++idim) { if (labels[idim] == 0) { ++count_zeros; } } if (count_zeros > ndim_broadcast) { ndim_broadcast = count_zeros; } } /* * If there is no output signature, fill output_labels and ndim_output * using each label that appeared once, in alphabetical order. */ if (subscripts[0] == '\0') { /* If no output was specified, always broadcast left, as usual. */ for (ndim_output = 0; ndim_output < ndim_broadcast; ++ndim_output) { output_labels[ndim_output] = 0; } for (label = min_label; label <= max_label; ++label) { if (label_counts[label] == 1) { if (ndim_output < NPY_MAXDIMS) { output_labels[ndim_output++] = label; } else { PyErr_SetString(PyExc_ValueError, "einstein sum subscript string has too many " "distinct labels"); return NULL; } } } } else { if (subscripts[0] != '-' || subscripts[1] != '>') { PyErr_SetString(PyExc_ValueError, "einstein sum subscript string does not " "contain proper '->' output specified"); return NULL; } subscripts += 2; /* Parse the output subscript string. */ ndim_output = parse_output_subscripts(subscripts, strlen(subscripts), ndim_broadcast, label_counts, output_labels); if (ndim_output < 0) { return NULL; } } if (out != NULL && PyArray_NDIM(out) != ndim_output) { PyErr_Format(PyExc_ValueError, "out parameter does not have the correct number of " "dimensions, has %d but should have %d", (int)PyArray_NDIM(out), (int)ndim_output); return NULL; } /* * If there's just one operand and no output parameter, * first try remapping the axes to the output to return * a view instead of a copy. */ if (nop == 1 && out == NULL) { ret = NULL; if (get_single_op_view(op_in[0], op_labels[0], ndim_output, output_labels, &ret) < 0) { return NULL; } if (ret != NULL) { return ret; } } /* Set all the op references to NULL */ for (iop = 0; iop < nop; ++iop) { op[iop] = NULL; } /* * Process all the input ops, combining dimensions into their * diagonal where specified. */ for (iop = 0; iop < nop; ++iop) { char *labels = op_labels[iop]; op[iop] = get_combined_dims_view(op_in[iop], iop, labels); if (op[iop] == NULL) { goto fail; } } /* Set the output op */ op[nop] = out; /* * Set up the labels for the iterator (output + combined labels). * Can just share the output_labels memory, because iter_labels * is output_labels with some more labels appended. */ iter_labels = output_labels; ndim_iter = ndim_output; for (label = min_label; label <= max_label; ++label) { if (label_counts[label] > 0 && memchr(output_labels, label, ndim_output) == NULL) { if (ndim_iter >= NPY_MAXDIMS) { PyErr_SetString(PyExc_ValueError, "too many subscripts in einsum"); goto fail; } iter_labels[ndim_iter++] = label; } } /* Set up the op_axes for the iterator */ for (iop = 0; iop < nop; ++iop) { op_axes[iop] = op_axes_arrays[iop]; if (prepare_op_axes(PyArray_NDIM(op[iop]), iop, op_labels[iop], op_axes[iop], ndim_iter, iter_labels) < 0) { goto fail; } } /* Set up the op_dtypes if dtype was provided */ if (dtype == NULL) { op_dtypes = NULL; } else { op_dtypes = op_dtypes_array; for (iop = 0; iop <= nop; ++iop) { op_dtypes[iop] = dtype; } } /* Set the op_axes for the output */ op_axes[nop] = op_axes_arrays[nop]; for (idim = 0; idim < ndim_output; ++idim) { op_axes[nop][idim] = idim; } for (idim = ndim_output; idim < ndim_iter; ++idim) { op_axes[nop][idim] = NPY_ITER_REDUCTION_AXIS(-1); } /* Set the iterator per-op flags */ for (iop = 0; iop < nop; ++iop) { op_flags[iop] = NPY_ITER_READONLY| NPY_ITER_NBO| NPY_ITER_ALIGNED; } op_flags[nop] = NPY_ITER_READWRITE| NPY_ITER_NBO| NPY_ITER_ALIGNED| NPY_ITER_ALLOCATE; iter_flags = NPY_ITER_EXTERNAL_LOOP| NPY_ITER_BUFFERED| NPY_ITER_DELAY_BUFALLOC| NPY_ITER_GROWINNER| NPY_ITER_REFS_OK| NPY_ITER_ZEROSIZE_OK; if (out != NULL) { iter_flags |= NPY_ITER_COPY_IF_OVERLAP; } if (dtype == NULL) { iter_flags |= NPY_ITER_COMMON_DTYPE; } /* Allocate the iterator */ iter = NpyIter_AdvancedNew(nop+1, op, iter_flags, order, casting, op_flags, op_dtypes, ndim_iter, op_axes, NULL, 0); if (iter == NULL) { goto fail; } /* Initialize the output to all zeros or None*/ ret = NpyIter_GetOperandArray(iter)[nop]; if (PyArray_AssignZero(ret, NULL) < 0) { goto fail; } /***************************/ /* * Acceleration for some specific loop structures. Note * that with axis coalescing, inputs with more dimensions can * be reduced to fit into these patterns. */ if (!NpyIter_RequiresBuffering(iter)) { int ndim = NpyIter_GetNDim(iter); switch (nop) { case 1: if (ndim == 2) { if (unbuffered_loop_nop1_ndim2(iter) < 0) { goto fail; } goto finish; } else if (ndim == 3) { if (unbuffered_loop_nop1_ndim3(iter) < 0) { goto fail; } goto finish; } break; case 2: if (ndim == 2) { if (unbuffered_loop_nop2_ndim2(iter) < 0) { goto fail; } goto finish; } else if (ndim == 3) { if (unbuffered_loop_nop2_ndim3(iter) < 0) { goto fail; } goto finish; } break; } } /***************************/ if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) { goto fail; } /* * Get an inner loop function, specializing it based on * the strides that are fixed for the whole loop. */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); sop = get_sum_of_products_function(nop, NpyIter_GetDescrArray(iter)[0]->type_num, NpyIter_GetDescrArray(iter)[0]->elsize, fixed_strides); #if NPY_EINSUM_DBG_TRACING NpyIter_DebugPrint(iter); #endif /* Finally, the main loop */ if (sop == NULL) { PyErr_SetString(PyExc_TypeError, "invalid data type for einsum"); } else if (NpyIter_GetIterSize(iter) != 0) { NpyIter_IterNextFunc *iternext; char **dataptr; npy_intp *stride; npy_intp *countptr; int needs_api; NPY_BEGIN_THREADS_DEF; iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { NpyIter_Deallocate(iter); goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); stride = NpyIter_GetInnerStrideArray(iter); countptr = NpyIter_GetInnerLoopSizePtr(iter); needs_api = NpyIter_IterationNeedsAPI(iter); NPY_BEGIN_THREADS_NDITER(iter); NPY_EINSUM_DBG_PRINT("Einsum loop\n"); do { sop(nop, dataptr, stride, *countptr); } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); NPY_END_THREADS; /* If the API was needed, it may have thrown an error */ if (NpyIter_IterationNeedsAPI(iter) && PyErr_Occurred()) { goto fail; } } finish: if (out != NULL) { ret = out; } Py_INCREF(ret); NpyIter_Deallocate(iter); for (iop = 0; iop < nop; ++iop) { Py_DECREF(op[iop]); } return ret; fail: for (iop = 0; iop < nop; ++iop) { Py_XDECREF(op[iop]); } return NULL; }