/* * Python Universal Functions Object -- Math for all types, plus fast * arrays math * * Full description * * This supports mathematical (and Boolean) functions on arrays and other python * objects. Math on large arrays of basic C types is rather efficient. * * Travis E. Oliphant 2005, 2006 oliphant@ee.byu.edu (oliphant.travis@ieee.org) * Brigham Young University * * based on the * * Original Implementation: * Copyright (c) 1995, 1996, 1997 Jim Hugunin, hugunin@mit.edu * * with inspiration and code from * Numarray * Space Science Telescope Institute * J. Todd Miller * Perry Greenfield * Rick White * */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE #define _UMATHMODULE #define PY_SSIZE_T_CLEAN #include #include #include "npy_config.h" #include "npy_pycompat.h" #include "npy_argparse.h" #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" #include "numpy/arrayscalars.h" #include "lowlevel_strided_loops.h" #include "ufunc_type_resolution.h" #include "reduction.h" #include "mem_overlap.h" #include "npy_hashtable.h" #include "ufunc_object.h" #include "override.h" #include "npy_import.h" #include "extobj.h" #include "arrayobject.h" #include "common.h" #include "ctors.h" #include "dtypemeta.h" #include "numpyos.h" #include "dispatching.h" #include "convert_datatype.h" #include "legacy_array_method.h" #include "abstractdtypes.h" /* TODO: Only for `NpyIter_GetTransferFlags` until it is public */ #define NPY_ITERATOR_IMPLEMENTATION_CODE #include "nditer_impl.h" /********** PRINTF DEBUG TRACING **************/ #define NPY_UF_DBG_TRACING 0 #if NPY_UF_DBG_TRACING #define NPY_UF_DBG_PRINT(s) {printf("%s", s);fflush(stdout);} #define NPY_UF_DBG_PRINT1(s, p1) {printf((s), (p1));fflush(stdout);} #define NPY_UF_DBG_PRINT2(s, p1, p2) {printf(s, p1, p2);fflush(stdout);} #define NPY_UF_DBG_PRINT3(s, p1, p2, p3) {printf(s, p1, p2, p3);fflush(stdout);} #else #define NPY_UF_DBG_PRINT(s) #define NPY_UF_DBG_PRINT1(s, p1) #define NPY_UF_DBG_PRINT2(s, p1, p2) #define NPY_UF_DBG_PRINT3(s, p1, p2, p3) #endif /**********************************************/ typedef struct { PyObject *in; /* The input arguments to the ufunc, a tuple */ PyObject *out; /* The output arguments, a tuple. If no non-None outputs are provided, then this is NULL. */ } ufunc_full_args; /* C representation of the context argument to __array_wrap__ */ typedef struct { PyUFuncObject *ufunc; ufunc_full_args args; int out_i; } _ufunc_context; /* Get the arg tuple to pass in the context argument to __array_wrap__ and * __array_prepare__. * * Output arguments are only passed if at least one is non-None. */ static PyObject * _get_wrap_prepare_args(ufunc_full_args full_args) { if (full_args.out == NULL) { Py_INCREF(full_args.in); return full_args.in; } else { return PySequence_Concat(full_args.in, full_args.out); } } /* ---------------------------------------------------------------- */ static PyObject * prepare_input_arguments_for_outer(PyObject *args, PyUFuncObject *ufunc); static int resolve_descriptors(int nop, PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl, PyArrayObject *operands[], PyArray_Descr *dtypes[], PyArray_DTypeMeta *signature[], NPY_CASTING casting); /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_getfperr(void) { /* * non-clearing get was only added in 1.9 so this function always cleared * keep it so just in case third party code relied on the clearing */ char param = 0; return npy_clear_floatstatus_barrier(¶m); } #define HANDLEIT(NAME, str) {if (retstatus & NPY_FPE_##NAME) { \ handle = errmask & UFUNC_MASK_##NAME; \ if (handle && \ _error_handler(handle >> UFUNC_SHIFT_##NAME, \ errobj, str, retstatus, first) < 0) \ return -1; \ }} /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_handlefperr(int errmask, PyObject *errobj, int retstatus, int *first) { int handle; if (errmask && retstatus) { HANDLEIT(DIVIDEBYZERO, "divide by zero"); HANDLEIT(OVERFLOW, "overflow"); HANDLEIT(UNDERFLOW, "underflow"); HANDLEIT(INVALID, "invalid value"); } return 0; } #undef HANDLEIT /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first) { /* clearing is done for backward compatibility */ int retstatus; retstatus = npy_clear_floatstatus_barrier((char*)&retstatus); return PyUFunc_handlefperr(errmask, errobj, retstatus, first); } /* Checking the status flag clears it */ /*UFUNC_API*/ NPY_NO_EXPORT void PyUFunc_clearfperr() { char param = 0; npy_clear_floatstatus_barrier(¶m); } /* * This function analyzes the input arguments and determines an appropriate * method (__array_prepare__ or __array_wrap__) function to call, taking it * from the input with the highest priority. Return NULL if no argument * defines the method. */ static PyObject* _find_array_method(PyObject *args, PyObject *method_name) { int i, n_methods; PyObject *obj; PyObject *with_method[NPY_MAXARGS], *methods[NPY_MAXARGS]; PyObject *method = NULL; n_methods = 0; for (i = 0; i < PyTuple_GET_SIZE(args); i++) { obj = PyTuple_GET_ITEM(args, i); if (PyArray_CheckExact(obj) || PyArray_IsAnyScalar(obj)) { continue; } method = PyObject_GetAttr(obj, method_name); if (method) { if (PyCallable_Check(method)) { with_method[n_methods] = obj; methods[n_methods] = method; ++n_methods; } else { Py_DECREF(method); method = NULL; } } else { PyErr_Clear(); } } if (n_methods > 0) { /* If we have some methods defined, find the one of highest priority */ method = methods[0]; if (n_methods > 1) { double maxpriority = PyArray_GetPriority(with_method[0], NPY_PRIORITY); for (i = 1; i < n_methods; ++i) { double priority = PyArray_GetPriority(with_method[i], NPY_PRIORITY); if (priority > maxpriority) { maxpriority = priority; Py_DECREF(method); method = methods[i]; } else { Py_DECREF(methods[i]); } } } } return method; } /* * Returns an incref'ed pointer to the proper __array_prepare__/__array_wrap__ * method for a ufunc output argument, given the output argument `obj`, and the * method chosen from the inputs `input_method`. */ static PyObject * _get_output_array_method(PyObject *obj, PyObject *method, PyObject *input_method) { if (obj != Py_None) { PyObject *ometh; if (PyArray_CheckExact(obj)) { /* * No need to wrap regular arrays - None signals to not call * wrap/prepare at all */ Py_RETURN_NONE; } ometh = PyObject_GetAttr(obj, method); if (ometh == NULL) { PyErr_Clear(); } else if (!PyCallable_Check(ometh)) { Py_DECREF(ometh); } else { /* Use the wrap/prepare method of the output if it's callable */ return ometh; } } /* Fall back on the input's wrap/prepare */ Py_XINCREF(input_method); return input_method; } /* * This function analyzes the input arguments * and determines an appropriate __array_prepare__ function to call * for the outputs. * * If an output argument is provided, then it is prepped * with its own __array_prepare__ not with the one determined by * the input arguments. * * if the provided output argument is already an ndarray, * the prepping function is None (which means no prepping will * be done --- not even PyArray_Return). * * A NULL is placed in output_prep for outputs that * should just have PyArray_Return called. */ static void _find_array_prepare(ufunc_full_args args, PyObject **output_prep, int nout) { int i; PyObject *prep; /* * Determine the prepping function given by the input arrays * (could be NULL). */ prep = _find_array_method(args.in, npy_um_str_array_prepare); /* * For all the output arrays decide what to do. * * 1) Use the prep function determined from the input arrays * This is the default if the output array is not * passed in. * * 2) Use the __array_prepare__ method of the output object. * This is special cased for * exact ndarray so that no PyArray_Return is * done in that case. */ if (args.out == NULL) { for (i = 0; i < nout; i++) { Py_XINCREF(prep); output_prep[i] = prep; } } else { for (i = 0; i < nout; i++) { output_prep[i] = _get_output_array_method( PyTuple_GET_ITEM(args.out, i), npy_um_str_array_prepare, prep); } } Py_XDECREF(prep); return; } #define NPY_UFUNC_DEFAULT_INPUT_FLAGS \ NPY_ITER_READONLY | \ NPY_ITER_ALIGNED | \ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE #define NPY_UFUNC_DEFAULT_OUTPUT_FLAGS \ NPY_ITER_ALIGNED | \ NPY_ITER_ALLOCATE | \ NPY_ITER_NO_BROADCAST | \ NPY_ITER_NO_SUBTYPE | \ NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE /* Called at module initialization to set the matmul ufunc output flags */ NPY_NO_EXPORT int set_matmul_flags(PyObject *d) { PyObject *matmul = _PyDict_GetItemStringWithError(d, "matmul"); if (matmul == NULL) { return -1; } /* * The default output flag NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE allows * perfectly overlapping input and output (in-place operations). While * correct for the common mathematical operations, this assumption is * incorrect in the general case and specifically in the case of matmul. * * NPY_ITER_UPDATEIFCOPY is added by default in * PyUFunc_GeneralizedFunction, which is the variant called for gufuncs * with a signature * * Enabling NPY_ITER_WRITEONLY can prevent a copy in some cases. */ ((PyUFuncObject *)matmul)->op_flags[2] = (NPY_ITER_WRITEONLY | NPY_ITER_UPDATEIFCOPY | NPY_UFUNC_DEFAULT_OUTPUT_FLAGS) & ~NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; return 0; } /* * Set per-operand flags according to desired input or output flags. * op_flags[i] for i in input (as determined by ufunc->nin) will be * merged with op_in_flags, perhaps overriding per-operand flags set * in previous stages. * op_flags[i] for i in output will be set to op_out_flags only if previously * unset. * The input flag behavior preserves backward compatibility, while the * output flag behaviour is the "correct" one for maximum flexibility. */ NPY_NO_EXPORT void _ufunc_setup_flags(PyUFuncObject *ufunc, npy_uint32 op_in_flags, npy_uint32 op_out_flags, npy_uint32 *op_flags) { int nin = ufunc->nin; int nout = ufunc->nout; int nop = nin + nout, i; /* Set up the flags */ for (i = 0; i < nin; ++i) { op_flags[i] = ufunc->op_flags[i] | op_in_flags; /* * If READWRITE flag has been set for this operand, * then clear default READONLY flag */ if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) { op_flags[i] &= ~NPY_ITER_READONLY; } } for (i = nin; i < nop; ++i) { op_flags[i] = ufunc->op_flags[i] ? ufunc->op_flags[i] : op_out_flags; } } /* * This function analyzes the input arguments * and determines an appropriate __array_wrap__ function to call * for the outputs. * * If an output argument is provided, then it is wrapped * with its own __array_wrap__ not with the one determined by * the input arguments. * * if the provided output argument is already an array, * the wrapping function is None (which means no wrapping will * be done --- not even PyArray_Return). * * A NULL is placed in output_wrap for outputs that * should just have PyArray_Return called. */ static void _find_array_wrap(ufunc_full_args args, npy_bool subok, PyObject **output_wrap, int nin, int nout) { int i; PyObject *wrap = NULL; /* * If a 'subok' parameter is passed and isn't True, don't wrap but put None * into slots with out arguments which means return the out argument */ if (!subok) { goto handle_out; } /* * Determine the wrapping function given by the input arrays * (could be NULL). */ wrap = _find_array_method(args.in, npy_um_str_array_wrap); /* * For all the output arrays decide what to do. * * 1) Use the wrap function determined from the input arrays * This is the default if the output array is not * passed in. * * 2) Use the __array_wrap__ method of the output object * passed in. -- this is special cased for * exact ndarray so that no PyArray_Return is * done in that case. */ handle_out: if (args.out == NULL) { for (i = 0; i < nout; i++) { Py_XINCREF(wrap); output_wrap[i] = wrap; } } else { for (i = 0; i < nout; i++) { output_wrap[i] = _get_output_array_method( PyTuple_GET_ITEM(args.out, i), npy_um_str_array_wrap, wrap); } } Py_XDECREF(wrap); } /* * Apply the __array_wrap__ function with the given array and content. * * Interprets wrap=None and wrap=NULL as intended by _find_array_wrap * * Steals a reference to obj and wrap. * Pass context=NULL to indicate there is no context. */ static PyObject * _apply_array_wrap( PyObject *wrap, PyArrayObject *obj, _ufunc_context const *context) { if (wrap == NULL) { /* default behavior */ return PyArray_Return(obj); } else if (wrap == Py_None) { Py_DECREF(wrap); return (PyObject *)obj; } else { PyObject *res; PyObject *py_context = NULL; /* Convert the context object to a tuple, if present */ if (context == NULL) { py_context = Py_None; Py_INCREF(py_context); } else { PyObject *args_tup; /* Call the method with appropriate context */ args_tup = _get_wrap_prepare_args(context->args); if (args_tup == NULL) { goto fail; } py_context = Py_BuildValue("OOi", context->ufunc, args_tup, context->out_i); Py_DECREF(args_tup); if (py_context == NULL) { goto fail; } } /* try __array_wrap__(obj, context) */ res = PyObject_CallFunctionObjArgs(wrap, obj, py_context, NULL); Py_DECREF(py_context); /* try __array_wrap__(obj) if the context argument is not accepted */ if (res == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) { PyErr_Clear(); res = PyObject_CallFunctionObjArgs(wrap, obj, NULL); } Py_DECREF(wrap); Py_DECREF(obj); return res; fail: Py_DECREF(wrap); Py_DECREF(obj); return NULL; } } /*UFUNC_API * * On return, if errobj is populated with a non-NULL value, the caller * owns a new reference to errobj. */ NPY_NO_EXPORT int PyUFunc_GetPyValues(char *name, int *bufsize, int *errmask, PyObject **errobj) { PyObject *ref = get_global_ext_obj(); return _extract_pyvals(ref, name, bufsize, errmask, errobj); } /* Return the position of next non-white-space char in the string */ static int _next_non_white_space(const char* str, int offset) { int ret = offset; while (str[ret] == ' ' || str[ret] == '\t') { ret++; } return ret; } static int _is_alpha_underscore(char ch) { return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_'; } static int _is_alnum_underscore(char ch) { return _is_alpha_underscore(ch) || (ch >= '0' && ch <= '9'); } /* * Convert a string into a number */ static npy_intp _get_size(const char* str) { char *stop; npy_longlong size = NumPyOS_strtoll(str, &stop, 10); if (stop == str || _is_alpha_underscore(*stop)) { /* not a well formed number */ return -1; } if (size >= NPY_MAX_INTP || size <= NPY_MIN_INTP) { /* len(str) too long */ return -1; } return size; } /* * Return the ending position of a variable name including optional modifier */ static int _get_end_of_name(const char* str, int offset) { int ret = offset; while (_is_alnum_underscore(str[ret])) { ret++; } if (str[ret] == '?') { ret ++; } return ret; } /* * Returns 1 if the dimension names pointed by s1 and s2 are the same, * otherwise returns 0. */ static int _is_same_name(const char* s1, const char* s2) { while (_is_alnum_underscore(*s1) && _is_alnum_underscore(*s2)) { if (*s1 != *s2) { return 0; } s1++; s2++; } return !_is_alnum_underscore(*s1) && !_is_alnum_underscore(*s2); } /* * Sets the following fields in the PyUFuncObject 'ufunc': * * Field Type Array Length * core_enabled int (effectively bool) N/A * core_num_dim_ix int N/A * core_dim_flags npy_uint32 * core_num_dim_ix * core_dim_sizes npy_intp * core_num_dim_ix * core_num_dims int * nargs (i.e. nin+nout) * core_offsets int * nargs * core_dim_ixs int * sum(core_num_dims) * core_signature char * strlen(signature) + 1 * * The function assumes that the values that are arrays have not * been set already, and sets these pointers to memory allocated * with PyArray_malloc. These are freed when the ufunc dealloc * method is called. * * Returns 0 unless an error occurred. */ static int _parse_signature(PyUFuncObject *ufunc, const char *signature) { size_t len; char const **var_names; int nd = 0; /* number of dimension of the current argument */ int cur_arg = 0; /* index into core_num_dims&core_offsets */ int cur_core_dim = 0; /* index into core_dim_ixs */ int i = 0; char *parse_error = NULL; if (signature == NULL) { PyErr_SetString(PyExc_RuntimeError, "_parse_signature with NULL signature"); return -1; } len = strlen(signature); ufunc->core_signature = PyArray_malloc(sizeof(char) * (len+1)); if (ufunc->core_signature) { strcpy(ufunc->core_signature, signature); } /* Allocate sufficient memory to store pointers to all dimension names */ var_names = PyArray_malloc(sizeof(char const*) * len); if (var_names == NULL) { PyErr_NoMemory(); return -1; } ufunc->core_enabled = 1; ufunc->core_num_dim_ix = 0; ufunc->core_num_dims = PyArray_malloc(sizeof(int) * ufunc->nargs); ufunc->core_offsets = PyArray_malloc(sizeof(int) * ufunc->nargs); /* The next three items will be shrunk later */ ufunc->core_dim_ixs = PyArray_malloc(sizeof(int) * len); ufunc->core_dim_sizes = PyArray_malloc(sizeof(npy_intp) * len); ufunc->core_dim_flags = PyArray_malloc(sizeof(npy_uint32) * len); if (ufunc->core_num_dims == NULL || ufunc->core_dim_ixs == NULL || ufunc->core_offsets == NULL || ufunc->core_dim_sizes == NULL || ufunc->core_dim_flags == NULL) { PyErr_NoMemory(); goto fail; } for (size_t j = 0; j < len; j++) { ufunc->core_dim_flags[j] = 0; } i = _next_non_white_space(signature, 0); while (signature[i] != '\0') { /* loop over input/output arguments */ if (cur_arg == ufunc->nin) { /* expect "->" */ if (signature[i] != '-' || signature[i+1] != '>') { parse_error = "expect '->'"; goto fail; } i = _next_non_white_space(signature, i + 2); } /* * parse core dimensions of one argument, * e.g. "()", "(i)", or "(i,j)" */ if (signature[i] != '(') { parse_error = "expect '('"; goto fail; } i = _next_non_white_space(signature, i + 1); while (signature[i] != ')') { /* loop over core dimensions */ int ix, i_end; npy_intp frozen_size; npy_bool can_ignore; if (signature[i] == '\0') { parse_error = "unexpected end of signature string"; goto fail; } /* * Is this a variable or a fixed size dimension? */ if (_is_alpha_underscore(signature[i])) { frozen_size = -1; } else { frozen_size = (npy_intp)_get_size(signature + i); if (frozen_size <= 0) { parse_error = "expect dimension name or non-zero frozen size"; goto fail; } } /* Is this dimension flexible? */ i_end = _get_end_of_name(signature, i); can_ignore = (i_end > 0 && signature[i_end - 1] == '?'); /* * Determine whether we already saw this dimension name, * get its index, and set its properties */ for(ix = 0; ix < ufunc->core_num_dim_ix; ix++) { if (frozen_size > 0 ? frozen_size == ufunc->core_dim_sizes[ix] : _is_same_name(signature + i, var_names[ix])) { break; } } /* * If a new dimension, store its properties; if old, check consistency. */ if (ix == ufunc->core_num_dim_ix) { ufunc->core_num_dim_ix++; var_names[ix] = signature + i; ufunc->core_dim_sizes[ix] = frozen_size; if (frozen_size < 0) { ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_SIZE_INFERRED; } if (can_ignore) { ufunc->core_dim_flags[ix] |= UFUNC_CORE_DIM_CAN_IGNORE; } } else { if (can_ignore && !(ufunc->core_dim_flags[ix] & UFUNC_CORE_DIM_CAN_IGNORE)) { parse_error = "? cannot be used, name already seen without ?"; goto fail; } if (!can_ignore && (ufunc->core_dim_flags[ix] & UFUNC_CORE_DIM_CAN_IGNORE)) { parse_error = "? must be used, name already seen with ?"; goto fail; } } ufunc->core_dim_ixs[cur_core_dim] = ix; cur_core_dim++; nd++; i = _next_non_white_space(signature, i_end); if (signature[i] != ',' && signature[i] != ')') { parse_error = "expect ',' or ')'"; goto fail; } if (signature[i] == ',') { i = _next_non_white_space(signature, i + 1); if (signature[i] == ')') { parse_error = "',' must not be followed by ')'"; goto fail; } } } ufunc->core_num_dims[cur_arg] = nd; ufunc->core_offsets[cur_arg] = cur_core_dim-nd; cur_arg++; nd = 0; i = _next_non_white_space(signature, i + 1); if (cur_arg != ufunc->nin && cur_arg != ufunc->nargs) { /* * The list of input arguments (or output arguments) was * only read partially */ if (signature[i] != ',') { parse_error = "expect ','"; goto fail; } i = _next_non_white_space(signature, i + 1); } } if (cur_arg != ufunc->nargs) { parse_error = "incomplete signature: not all arguments found"; goto fail; } ufunc->core_dim_ixs = PyArray_realloc(ufunc->core_dim_ixs, sizeof(int) * cur_core_dim); ufunc->core_dim_sizes = PyArray_realloc( ufunc->core_dim_sizes, sizeof(npy_intp) * ufunc->core_num_dim_ix); ufunc->core_dim_flags = PyArray_realloc( ufunc->core_dim_flags, sizeof(npy_uint32) * ufunc->core_num_dim_ix); /* check for trivial core-signature, e.g. "(),()->()" */ if (cur_core_dim == 0) { ufunc->core_enabled = 0; } PyArray_free((void*)var_names); return 0; fail: PyArray_free((void*)var_names); if (parse_error) { PyErr_Format(PyExc_ValueError, "%s at position %d in \"%s\"", parse_error, i, signature); } return -1; } /* * Checks if 'obj' is a valid output array for a ufunc, i.e. it is * either None or a writeable array, increments its reference count * and stores a pointer to it in 'store'. Returns 0 on success, sets * an exception and returns -1 on failure. */ static int _set_out_array(PyObject *obj, PyArrayObject **store) { if (obj == Py_None) { /* Translate None to NULL */ return 0; } if (PyArray_Check(obj)) { /* If it's an array, store it */ if (PyArray_FailUnlessWriteable((PyArrayObject *)obj, "output array") < 0) { return -1; } Py_INCREF(obj); *store = (PyArrayObject *)obj; return 0; } PyErr_SetString(PyExc_TypeError, "return arrays must be of ArrayType"); return -1; } /********* GENERIC UFUNC USING ITERATOR *********/ /* * Produce a name for the ufunc, if one is not already set * This is used in the PyUFunc_handlefperr machinery, and in error messages */ NPY_NO_EXPORT const char* ufunc_get_name_cstr(PyUFuncObject *ufunc) { return ufunc->name ? ufunc->name : ""; } /* * Converters for use in parsing of keywords arguments. */ static int _subok_converter(PyObject *obj, npy_bool *subok) { if (PyBool_Check(obj)) { *subok = (obj == Py_True); return NPY_SUCCEED; } else { PyErr_SetString(PyExc_TypeError, "'subok' must be a boolean"); return NPY_FAIL; } } static int _keepdims_converter(PyObject *obj, int *keepdims) { if (PyBool_Check(obj)) { *keepdims = (obj == Py_True); return NPY_SUCCEED; } else { PyErr_SetString(PyExc_TypeError, "'keepdims' must be a boolean"); return NPY_FAIL; } } static int _wheremask_converter(PyObject *obj, PyArrayObject **wheremask) { /* * Optimization: where=True is the same as no where argument. * This lets us document True as the default. */ if (obj == Py_True) { return NPY_SUCCEED; } else { PyArray_Descr *dtype = PyArray_DescrFromType(NPY_BOOL); if (dtype == NULL) { return NPY_FAIL; } /* PyArray_FromAny steals reference to dtype, even on failure */ *wheremask = (PyArrayObject *)PyArray_FromAny(obj, dtype, 0, 0, 0, NULL); if ((*wheremask) == NULL) { return NPY_FAIL; } return NPY_SUCCEED; } } /* * Due to the array override, do the actual parameter conversion * only in this step. This function takes the reference objects and * parses them into the desired values. * This function cleans up after itself and NULLs references on error, * however, the caller has to ensure that `out_op[0:nargs]` and `out_whermeask` * are NULL initialized. */ static int convert_ufunc_arguments(PyUFuncObject *ufunc, ufunc_full_args full_args, PyArrayObject *out_op[], PyArray_DTypeMeta *out_op_DTypes[], npy_bool *force_legacy_promotion, npy_bool *allow_legacy_promotion, npy_bool *promoting_pyscalars, PyObject *order_obj, NPY_ORDER *out_order, PyObject *casting_obj, NPY_CASTING *out_casting, PyObject *subok_obj, npy_bool *out_subok, PyObject *where_obj, PyArrayObject **out_wheremask, /* PyArray of bool */ PyObject *keepdims_obj, int *out_keepdims) { int nin = ufunc->nin; int nout = ufunc->nout; int nop = ufunc->nargs; PyObject *obj; /* Convert and fill in input arguments */ npy_bool all_scalar = NPY_TRUE; npy_bool any_scalar = NPY_FALSE; *allow_legacy_promotion = NPY_TRUE; *force_legacy_promotion = NPY_FALSE; *promoting_pyscalars = NPY_FALSE; for (int i = 0; i < nin; i++) { obj = PyTuple_GET_ITEM(full_args.in, i); if (PyArray_Check(obj)) { out_op[i] = (PyArrayObject *)obj; Py_INCREF(out_op[i]); } else { /* Convert the input to an array and check for special cases */ out_op[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, NULL); if (out_op[i] == NULL) { goto fail; } } out_op_DTypes[i] = NPY_DTYPE(PyArray_DESCR(out_op[i])); Py_INCREF(out_op_DTypes[i]); if (!NPY_DT_is_legacy(out_op_DTypes[i])) { *allow_legacy_promotion = NPY_FALSE; // TODO: A subclass of int, float, complex could reach here and // it should not be flagged as "weak" if it does. } if (PyArray_NDIM(out_op[i]) == 0) { any_scalar = NPY_TRUE; } else { all_scalar = NPY_FALSE; continue; } // TODO: Is this equivalent/better by removing the logic which enforces // that we always use weak promotion in the core? if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION) { continue; /* Skip use of special dtypes */ } /* * Handle the "weak" Python scalars/literals. We use a special DType * for these. * Further, we mark the operation array with a special flag to indicate * this. This is because the legacy dtype resolution makes use of * `np.can_cast(operand, dtype)`. The flag is local to this use, but * necessary to propagate the information to the legacy type resolution. */ if (npy_mark_tmp_array_if_pyscalar(obj, out_op[i], &out_op_DTypes[i])) { if (PyArray_FLAGS(out_op[i]) & NPY_ARRAY_WAS_PYTHON_INT && PyArray_TYPE(out_op[i]) != NPY_LONG) { /* * When `np.array(integer)` is not the default integer (mainly * object dtype), this confuses many type resolvers. Simply * forcing a default integer array is unfortunately easiest. * In this disables the optional NEP 50 warnings, but in * practice when this happens we should _usually_ pick the * default integer loop and that raises an error. * (An exception is `float64(1.) + 10**100` which silently * will give a float64 result rather than a Python float.) * * TODO: Just like the general dual NEP 50/legacy promotion * support this is meant as a temporary hack for NumPy 1.25. */ static PyArrayObject *zero_arr = NULL; if (NPY_UNLIKELY(zero_arr == NULL)) { zero_arr = (PyArrayObject *)PyArray_ZEROS( 0, NULL, NPY_LONG, NPY_FALSE); if (zero_arr == NULL) { goto fail; } ((PyArrayObject_fields *)zero_arr)->flags |= ( NPY_ARRAY_WAS_PYTHON_INT|NPY_ARRAY_WAS_INT_AND_REPLACED); } Py_INCREF(zero_arr); Py_SETREF(out_op[i], zero_arr); } *promoting_pyscalars = NPY_TRUE; } } if (*allow_legacy_promotion && (!all_scalar && any_scalar)) { *force_legacy_promotion = should_use_min_scalar(nin, out_op, 0, NULL); } /* Convert and fill in output arguments */ memset(out_op_DTypes + nin, 0, nout * sizeof(*out_op_DTypes)); if (full_args.out != NULL) { for (int i = 0; i < nout; i++) { obj = PyTuple_GET_ITEM(full_args.out, i); if (_set_out_array(obj, out_op + i + nin) < 0) { goto fail; } if (out_op[i] != NULL) { out_op_DTypes[i + nin] = NPY_DTYPE(PyArray_DESCR(out_op[i])); Py_INCREF(out_op_DTypes[i + nin]); } } } /* * Convert most arguments manually here, since it is easier to handle * the ufunc override if we first parse only to objects. */ if (where_obj && !_wheremask_converter(where_obj, out_wheremask)) { goto fail; } if (keepdims_obj && !_keepdims_converter(keepdims_obj, out_keepdims)) { goto fail; } if (casting_obj && !PyArray_CastingConverter(casting_obj, out_casting)) { goto fail; } if (order_obj && !PyArray_OrderConverter(order_obj, out_order)) { goto fail; } if (subok_obj && !_subok_converter(subok_obj, out_subok)) { goto fail; } return 0; fail: if (out_wheremask != NULL) { Py_XSETREF(*out_wheremask, NULL); } for (int i = 0; i < nop; i++) { Py_XSETREF(out_op[i], NULL); } return -1; } /* * This checks whether a trivial loop is ok, * making copies of scalar and one dimensional operands if that will * help. * * Returns 1 if a trivial loop is ok, 0 if it is not, and * -1 if there is an error. */ static int check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, PyArrayObject **op, PyArray_Descr **dtypes, NPY_CASTING casting, npy_intp buffersize) { int force_cast_input = ufuncimpl->flags & _NPY_METH_FORCE_CAST_INPUTS; int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout; for (i = 0; i < nop; ++i) { /* * If the dtype doesn't match, or the array isn't aligned, * indicate that the trivial loop can't be done. */ if (op[i] == NULL) { continue; } int must_copy = !PyArray_ISALIGNED(op[i]); if (dtypes[i] != PyArray_DESCR(op[i])) { npy_intp view_offset; NPY_CASTING safety = PyArray_GetCastInfo( PyArray_DESCR(op[i]), dtypes[i], NULL, &view_offset); if (safety < 0 && PyErr_Occurred()) { /* A proper error during a cast check, should be rare */ return -1; } if (view_offset != 0) { /* NOTE: Could possibly implement non-zero view offsets */ must_copy = 1; } if (force_cast_input && i < nin) { /* * ArrayMethod flagged to ignore casting (logical funcs * can force cast to bool) */ } else if (PyArray_MinCastSafety(safety, casting) != casting) { return 0; /* the cast is not safe enough */ } } if (must_copy) { /* * If op[j] is a scalar or small one dimensional * array input, make a copy to keep the opportunity * for a trivial loop. Outputs are not copied here. */ if (i < nin && (PyArray_NDIM(op[i]) == 0 || (PyArray_NDIM(op[i]) == 1 && PyArray_DIM(op[i], 0) <= buffersize))) { PyArrayObject *tmp; Py_INCREF(dtypes[i]); tmp = (PyArrayObject *)PyArray_CastToType(op[i], dtypes[i], 0); if (tmp == NULL) { return -1; } Py_DECREF(op[i]); op[i] = tmp; } else { return 0; } } } return 1; } /* * Calls the given __array_prepare__ function on the operand *op, * substituting it in place if a new array is returned and matches * the old one. * * This requires that the dimensions, strides and data type remain * exactly the same, which may be more strict than before. */ static int prepare_ufunc_output(PyUFuncObject *ufunc, PyArrayObject **op, PyObject *arr_prep, ufunc_full_args full_args, int i) { if (arr_prep != NULL && arr_prep != Py_None) { PyObject *res; PyArrayObject *arr; PyObject *args_tup; /* Call with the context argument */ args_tup = _get_wrap_prepare_args(full_args); if (args_tup == NULL) { return -1; } res = PyObject_CallFunction( arr_prep, "O(OOi)", *op, ufunc, args_tup, i); Py_DECREF(args_tup); if (res == NULL) { return -1; } else if (!PyArray_Check(res)) { PyErr_SetString(PyExc_TypeError, "__array_prepare__ must return an " "ndarray or subclass thereof"); Py_DECREF(res); return -1; } arr = (PyArrayObject *)res; /* If the same object was returned, nothing to do */ if (arr == *op) { Py_DECREF(arr); } /* If the result doesn't match, throw an error */ else if (PyArray_NDIM(arr) != PyArray_NDIM(*op) || !PyArray_CompareLists(PyArray_DIMS(arr), PyArray_DIMS(*op), PyArray_NDIM(arr)) || !PyArray_CompareLists(PyArray_STRIDES(arr), PyArray_STRIDES(*op), PyArray_NDIM(arr)) || !PyArray_EquivTypes(PyArray_DESCR(arr), PyArray_DESCR(*op))) { PyErr_SetString(PyExc_TypeError, "__array_prepare__ must return an " "ndarray or subclass thereof which is " "otherwise identical to its input"); Py_DECREF(arr); return -1; } /* Replace the op value */ else { Py_DECREF(*op); *op = arr; } } return 0; } /* * Check whether a trivial loop is possible and call the innerloop if it is. * A trivial loop is defined as one where a single strided inner-loop call * is possible. * * This function only supports a single output (due to the overlap check). * It always accepts 0-D arrays and will broadcast them. The function * cannot broadcast any other array (as it requires a single stride). * The function accepts all 1-D arrays, and N-D arrays that are either all * C- or all F-contiguous. * NOTE: Broadcast outputs are implicitly rejected in the overlap detection. * * Returns -2 if a trivial loop is not possible, 0 on success and -1 on error. */ static int try_trivial_single_output_loop(PyArrayMethod_Context *context, PyArrayObject *op[], NPY_ORDER order, PyObject *arr_prep[], ufunc_full_args full_args, int errormask, PyObject *extobj) { int nin = context->method->nin; int nop = nin + 1; assert(context->method->nout == 1); /* The order of all N-D contiguous operands, can be fixed by `order` */ int operation_order = 0; if (order == NPY_CORDER) { operation_order = NPY_ARRAY_C_CONTIGUOUS; } else if (order == NPY_FORTRANORDER) { operation_order = NPY_ARRAY_F_CONTIGUOUS; } int operation_ndim = 0; npy_intp *operation_shape = NULL; npy_intp fixed_strides[NPY_MAXARGS]; for (int iop = 0; iop < nop; iop++) { if (op[iop] == NULL) { /* The out argument may be NULL (and only that one); fill later */ assert(iop == nin); continue; } int op_ndim = PyArray_NDIM(op[iop]); /* Special case 0-D since we can handle broadcasting using a 0-stride */ if (op_ndim == 0 && iop < nin) { fixed_strides[iop] = 0; continue; } /* First non 0-D op: fix dimensions, shape (order is fixed later) */ if (operation_ndim == 0) { operation_ndim = op_ndim; operation_shape = PyArray_SHAPE(op[iop]); } else if (op_ndim != operation_ndim) { return -2; /* dimension mismatch (except 0-d input ops) */ } else if (!PyArray_CompareLists( operation_shape, PyArray_DIMS(op[iop]), op_ndim)) { return -2; /* shape mismatch */ } if (op_ndim == 1) { fixed_strides[iop] = PyArray_STRIDES(op[iop])[0]; } else { fixed_strides[iop] = PyArray_ITEMSIZE(op[iop]); /* contiguous */ /* This op must match the operation order (and be contiguous) */ int op_order = (PyArray_FLAGS(op[iop]) & (NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_F_CONTIGUOUS)); if (op_order == 0) { return -2; /* N-dimensional op must be contiguous */ } else if (operation_order == 0) { operation_order = op_order; /* op fixes order */ } else if (operation_order != op_order) { return -2; } } } if (op[nin] == NULL) { Py_INCREF(context->descriptors[nin]); op[nin] = (PyArrayObject *) PyArray_NewFromDescr(&PyArray_Type, context->descriptors[nin], operation_ndim, operation_shape, NULL, NULL, operation_order==NPY_ARRAY_F_CONTIGUOUS, NULL); if (op[nin] == NULL) { return -1; } fixed_strides[nin] = context->descriptors[nin]->elsize; } else { /* If any input overlaps with the output, we use the full path. */ for (int iop = 0; iop < nin; iop++) { if (!PyArray_EQUIVALENTLY_ITERABLE_OVERLAP_OK( op[iop], op[nin], PyArray_TRIVIALLY_ITERABLE_OP_READ, PyArray_TRIVIALLY_ITERABLE_OP_NOREAD)) { return -2; } } /* Check self-overlap (non 1-D are contiguous, perfect overlap is OK) */ if (operation_ndim == 1 && PyArray_STRIDES(op[nin])[0] < PyArray_ITEMSIZE(op[nin]) && PyArray_STRIDES(op[nin])[0] != 0) { return -2; } } /* Call the __prepare_array__ if necessary */ if (prepare_ufunc_output((PyUFuncObject *)context->caller, &op[nin], arr_prep[0], full_args, 0) < 0) { return -1; } /* * We can use the trivial (single inner-loop call) optimization * and `fixed_strides` holds the strides for that call. */ char *data[NPY_MAXARGS]; npy_intp count = PyArray_MultiplyList(operation_shape, operation_ndim); if (count == 0) { /* Nothing to do */ return 0; } NPY_BEGIN_THREADS_DEF; PyArrayMethod_StridedLoop *strided_loop; NpyAuxData *auxdata = NULL; NPY_ARRAYMETHOD_FLAGS flags = 0; if (context->method->get_strided_loop(context, 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { return -1; } for (int iop=0; iop < nop; iop++) { data[iop] = PyArray_BYTES(op[iop]); } if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { npy_clear_floatstatus_barrier((char *)context); } if (!(flags & NPY_METH_REQUIRES_PYAPI)) { NPY_BEGIN_THREADS_THRESHOLDED(count); } int res = strided_loop(context, data, &count, fixed_strides, auxdata); NPY_END_THREADS; NPY_AUXDATA_FREE(auxdata); /* * An error should only be possible if `res != 0` is already set. * But this is not strictly correct for old-style ufuncs (e.g. `power` * released the GIL but manually set an Exception). */ if (PyErr_Occurred()) { res = -1; } if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ const char *name = ufunc_get_name_cstr((PyUFuncObject *)context->caller); res = _check_ufunc_fperr(errormask, extobj, name); } return res; } /* * Check casting: It would be nice to just move this into the iterator * or pass in the full cast information. But this can special case * the logical functions and prints a better error message. */ static inline int validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc, PyArrayObject *ops[], PyArray_Descr *descriptors[], NPY_CASTING casting) { if (method->resolve_descriptors == &wrapped_legacy_resolve_descriptors) { /* * In this case the legacy type resolution was definitely called * and we do not need to check (astropy/pyerfa relied on this). */ return 0; } if (method->flags & _NPY_METH_FORCE_CAST_INPUTS) { if (PyUFunc_ValidateOutCasting(ufunc, casting, ops, descriptors) < 0) { return -1; } } else { if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) { return -1; } } return 0; } /* * The ufunc loop implementation for both normal ufunc calls and masked calls * when the iterator has to be used. * * See `PyUFunc_GenericFunctionInternal` for more information (where this is * called from). */ static int execute_ufunc_loop(PyArrayMethod_Context *context, int masked, PyArrayObject **op, NPY_ORDER order, npy_intp buffersize, NPY_CASTING casting, PyObject **arr_prep, ufunc_full_args full_args, npy_uint32 *op_flags, int errormask, PyObject *extobj) { PyUFuncObject *ufunc = (PyUFuncObject *)context->caller; int nin = context->method->nin, nout = context->method->nout; int nop = nin + nout; if (validate_casting(context->method, ufunc, op, context->descriptors, casting) < 0) { return -1; } if (masked) { assert(PyArray_TYPE(op[nop]) == NPY_BOOL); if (ufunc->_always_null_previously_masked_innerloop_selector != NULL) { if (PyErr_WarnFormat(PyExc_UserWarning, 1, "The ufunc %s has a custom masked-inner-loop-selector." "NumPy assumes that this is NEVER used. If you do make " "use of this please notify the NumPy developers to discuss " "future solutions. (See NEP 41 and 43)\n" "NumPy will continue, but ignore the custom loop selector. " "This should only affect performance.", ufunc_get_name_cstr(ufunc)) < 0) { return -1; } } /* * NOTE: In the masked version, we consider the output read-write, * this gives a best-effort of preserving the input, but does * not always work. It could allow the operand to be copied * due to copy-if-overlap, but only if it was passed in. * In that case `__array_prepare__` is called before it happens. */ for (int i = nin; i < nop; ++i) { op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY); } op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; /* mask */ } NPY_UF_DBG_PRINT("Making iterator\n"); npy_uint32 iter_flags = ufunc->iter_flags | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK | NPY_ITER_ZEROSIZE_OK | NPY_ITER_BUFFERED | NPY_ITER_GROWINNER | NPY_ITER_DELAY_BUFALLOC | NPY_ITER_COPY_IF_OVERLAP; /* * Call the __array_prepare__ functions for already existing output arrays. * Do this before creating the iterator, as the iterator may UPDATEIFCOPY * some of them. */ for (int i = 0; i < nout; i++) { if (op[nin+i] == NULL) { continue; } if (prepare_ufunc_output(ufunc, &op[nin+i], arr_prep[i], full_args, i) < 0) { return -1; } } /* * Allocate the iterator. Because the types of the inputs * were already checked, we use the casting rule 'unsafe' which * is faster to calculate. */ NpyIter *iter = NpyIter_AdvancedNew(nop + masked, op, iter_flags, order, NPY_UNSAFE_CASTING, op_flags, context->descriptors, -1, NULL, NULL, buffersize); if (iter == NULL) { return -1; } NPY_UF_DBG_PRINT("Made iterator\n"); /* Call the __array_prepare__ functions for newly allocated arrays */ PyArrayObject **op_it = NpyIter_GetOperandArray(iter); char *baseptrs[NPY_MAXARGS]; for (int i = 0; i < nout; ++i) { if (op[nin + i] == NULL) { op[nin + i] = op_it[nin + i]; Py_INCREF(op[nin + i]); /* Call the __array_prepare__ functions for the new array */ if (prepare_ufunc_output(ufunc, &op[nin + i], arr_prep[i], full_args, i) < 0) { NpyIter_Deallocate(iter); return -1; } /* * In case __array_prepare__ returned a different array, put the * results directly there, ignoring the array allocated by the * iterator. * * Here, we assume the user-provided __array_prepare__ behaves * sensibly and doesn't return an array overlapping in memory * with other operands --- the op[nin+i] array passed to it is newly * allocated and doesn't have any overlap. */ baseptrs[nin + i] = PyArray_BYTES(op[nin + i]); } else { baseptrs[nin + i] = PyArray_BYTES(op_it[nin + i]); } } /* Only do the loop if the iteration size is non-zero */ npy_intp full_size = NpyIter_GetIterSize(iter); if (full_size == 0) { if (!NpyIter_Deallocate(iter)) { return -1; } return 0; } /* * Reset the iterator with the base pointers possibly modified by * `__array_prepare__`. */ for (int i = 0; i < nin; i++) { baseptrs[i] = PyArray_BYTES(op_it[i]); } if (masked) { baseptrs[nop] = PyArray_BYTES(op_it[nop]); } /* * Get the inner loop, with the possibility of specialization * based on the fixed strides. */ PyArrayMethod_StridedLoop *strided_loop; NpyAuxData *auxdata; npy_intp fixed_strides[NPY_MAXARGS]; NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); NPY_ARRAYMETHOD_FLAGS flags = 0; if (masked) { if (PyArrayMethod_GetMaskedStridedLoop(context, 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { NpyIter_Deallocate(iter); return -1; } } else { if (context->method->get_strided_loop(context, 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { NpyIter_Deallocate(iter); return -1; } } /* Get the variables needed for the loop */ NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { NPY_AUXDATA_FREE(auxdata); NpyIter_Deallocate(iter); return -1; } char **dataptr = NpyIter_GetDataPtrArray(iter); npy_intp *strides = NpyIter_GetInnerStrideArray(iter); npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter); NPY_BEGIN_THREADS_DEF; flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter)); if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { npy_clear_floatstatus_barrier((char *)context); } if (!(flags & NPY_METH_REQUIRES_PYAPI)) { NPY_BEGIN_THREADS_THRESHOLDED(full_size); } /* The reset may copy the first buffer chunk, which could cause FPEs */ if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) { NPY_AUXDATA_FREE(auxdata); NpyIter_Deallocate(iter); return -1; } NPY_UF_DBG_PRINT("Actual inner loop:\n"); /* Execute the loop */ int res; do { NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)*countptr); res = strided_loop(context, dataptr, countptr, strides, auxdata); } while (res == 0 && iternext(iter)); NPY_END_THREADS; NPY_AUXDATA_FREE(auxdata); if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ const char *name = ufunc_get_name_cstr((PyUFuncObject *)context->caller); res = _check_ufunc_fperr(errormask, extobj, name); } if (!NpyIter_Deallocate(iter)) { return -1; } return res; } /* * Validate that operands have enough dimensions, accounting for * possible flexible dimensions that may be absent. */ static int _validate_num_dims(PyUFuncObject *ufunc, PyArrayObject **op, npy_uint32 *core_dim_flags, int *op_core_num_dims) { int i, j; int nin = ufunc->nin; int nop = ufunc->nargs; for (i = 0; i < nop; i++) { if (op[i] != NULL) { int op_ndim = PyArray_NDIM(op[i]); if (op_ndim < op_core_num_dims[i]) { int core_offset = ufunc->core_offsets[i]; /* We've too few, but some dimensions might be flexible */ for (j = core_offset; j < core_offset + ufunc->core_num_dims[i]; j++) { int core_dim_index = ufunc->core_dim_ixs[j]; if ((core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_CAN_IGNORE)) { int i1, j1, k; /* * Found a dimension that can be ignored. Flag that * it is missing, and unflag that it can be ignored, * since we are doing so already. */ core_dim_flags[core_dim_index] |= UFUNC_CORE_DIM_MISSING; core_dim_flags[core_dim_index] ^= UFUNC_CORE_DIM_CAN_IGNORE; /* * Reduce the number of core dimensions for all * operands that use this one (including ours), * and check whether we're now OK. */ for (i1 = 0, k=0; i1 < nop; i1++) { for (j1 = 0; j1 < ufunc->core_num_dims[i1]; j1++) { if (ufunc->core_dim_ixs[k++] == core_dim_index) { op_core_num_dims[i1]--; } } } if (op_ndim == op_core_num_dims[i]) { break; } } } if (op_ndim < op_core_num_dims[i]) { PyErr_Format(PyExc_ValueError, "%s: %s operand %d does not have enough " "dimensions (has %d, gufunc core with " "signature %s requires %d)", ufunc_get_name_cstr(ufunc), i < nin ? "Input" : "Output", i < nin ? i : i - nin, PyArray_NDIM(op[i]), ufunc->core_signature, op_core_num_dims[i]); return -1; } } } } return 0; } /* * Check whether any of the outputs of a gufunc has core dimensions. */ static int _has_output_coredims(PyUFuncObject *ufunc) { int i; for (i = ufunc->nin; i < ufunc->nin + ufunc->nout; ++i) { if (ufunc->core_num_dims[i] > 0) { return 1; } } return 0; } /* * Check whether the gufunc can be used with axis, i.e., that there is only * a single, shared core dimension (which means that operands either have * that dimension, or have no core dimensions). Returns 0 if all is fine, * and sets an error and returns -1 if not. */ static int _check_axis_support(PyUFuncObject *ufunc) { if (ufunc->core_num_dim_ix != 1) { PyErr_Format(PyExc_TypeError, "%s: axis can only be used with a single shared core " "dimension, not with the %d distinct ones implied by " "signature %s.", ufunc_get_name_cstr(ufunc), ufunc->core_num_dim_ix, ufunc->core_signature); return -1; } return 0; } /* * Check whether the gufunc can be used with keepdims, i.e., that all its * input arguments have the same number of core dimension, and all output * arguments have no core dimensions. Returns 0 if all is fine, and sets * an error and returns -1 if not. */ static int _check_keepdims_support(PyUFuncObject *ufunc) { int i; int nin = ufunc->nin, nout = ufunc->nout; int input_core_dims = ufunc->core_num_dims[0]; for (i = 1; i < nin + nout; i++) { if (ufunc->core_num_dims[i] != (i < nin ? input_core_dims : 0)) { PyErr_Format(PyExc_TypeError, "%s does not support keepdims: its signature %s requires " "%s %d to have %d core dimensions, but keepdims can only " "be used when all inputs have the same number of core " "dimensions and all outputs have no core dimensions.", ufunc_get_name_cstr(ufunc), ufunc->core_signature, i < nin ? "input" : "output", i < nin ? i : i - nin, ufunc->core_num_dims[i]); return -1; } } return 0; } /* * Interpret a possible axes keyword argument, using it to fill the remap_axis * array which maps default to actual axes for each operand, indexed as * as remap_axis[iop][iaxis]. The default axis order has first all broadcast * axes and then the core axes the gufunc operates on. * * Returns 0 on success, and -1 on failure */ static int _parse_axes_arg(PyUFuncObject *ufunc, int op_core_num_dims[], PyObject *axes, PyArrayObject **op, int broadcast_ndim, int **remap_axis) { static PyObject *AxisError_cls = NULL; int nin = ufunc->nin; int nop = ufunc->nargs; int iop, list_size; if (!PyList_Check(axes)) { PyErr_SetString(PyExc_TypeError, "axes should be a list."); return -1; } list_size = PyList_Size(axes); if (list_size != nop) { if (list_size != nin || _has_output_coredims(ufunc)) { PyErr_Format(PyExc_ValueError, "axes should be a list with an entry for all " "%d inputs and outputs; entries for outputs can only " "be omitted if none of them has core axes.", nop); return -1; } for (iop = nin; iop < nop; iop++) { remap_axis[iop] = NULL; } } for (iop = 0; iop < list_size; ++iop) { int op_ndim, op_ncore, op_nbroadcast; int have_seen_axis[NPY_MAXDIMS] = {0}; PyObject *op_axes_tuple, *axis_item; int axis, op_axis; op_ncore = op_core_num_dims[iop]; if (op[iop] != NULL) { op_ndim = PyArray_NDIM(op[iop]); op_nbroadcast = op_ndim - op_ncore; } else { op_nbroadcast = broadcast_ndim; op_ndim = broadcast_ndim + op_ncore; } /* * Get axes tuple for operand. If not a tuple already, make it one if * there is only one axis (its content is checked later). */ op_axes_tuple = PyList_GET_ITEM(axes, iop); if (PyTuple_Check(op_axes_tuple)) { if (PyTuple_Size(op_axes_tuple) != op_ncore) { /* must have been a tuple with too many entries. */ npy_cache_import( "numpy.exceptions", "AxisError", &AxisError_cls); if (AxisError_cls == NULL) { return -1; } PyErr_Format(AxisError_cls, "%s: operand %d has %d core dimensions, " "but %zd dimensions are specified by axes tuple.", ufunc_get_name_cstr(ufunc), iop, op_ncore, PyTuple_Size(op_axes_tuple)); return -1; } Py_INCREF(op_axes_tuple); } else if (op_ncore == 1) { op_axes_tuple = PyTuple_Pack(1, op_axes_tuple); if (op_axes_tuple == NULL) { return -1; } } else { /* If input is not an integer tell user that a tuple is needed */ if (error_converting(PyArray_PyIntAsInt(op_axes_tuple))) { PyErr_Format(PyExc_TypeError, "%s: axes item %d should be a tuple.", ufunc_get_name_cstr(ufunc), iop); return -1; } /* If it is a single integer, inform user that more are needed */ npy_cache_import("numpy.exceptions", "AxisError", &AxisError_cls); if (AxisError_cls == NULL) { return -1; } PyErr_Format(AxisError_cls, "%s: operand %d has %d core dimensions, " "but the axes item is a single integer.", ufunc_get_name_cstr(ufunc), iop, op_ncore); return -1; } /* * Now create the remap, starting with the core dimensions, and then * adding the remaining broadcast axes that are to be iterated over. */ for (axis = op_nbroadcast; axis < op_ndim; axis++) { axis_item = PyTuple_GET_ITEM(op_axes_tuple, axis - op_nbroadcast); op_axis = PyArray_PyIntAsInt(axis_item); if (error_converting(op_axis) || (check_and_adjust_axis(&op_axis, op_ndim) < 0)) { Py_DECREF(op_axes_tuple); return -1; } if (have_seen_axis[op_axis]) { PyErr_Format(PyExc_ValueError, "axes item %d has value %d repeated", iop, op_axis); Py_DECREF(op_axes_tuple); return -1; } have_seen_axis[op_axis] = 1; remap_axis[iop][axis] = op_axis; } Py_DECREF(op_axes_tuple); /* * Fill the op_nbroadcast=op_ndim-op_ncore axes not yet set, * using have_seen_axis to skip over entries set above. */ for (axis = 0, op_axis = 0; axis < op_nbroadcast; axis++) { while (have_seen_axis[op_axis]) { op_axis++; } remap_axis[iop][axis] = op_axis++; } /* * Check whether we are actually remapping anything. Here, * op_axis can only equal axis if all broadcast axes were the same * (i.e., the while loop above was never entered). */ if (axis == op_axis) { while (axis < op_ndim && remap_axis[iop][axis] == axis) { axis++; } } if (axis == op_ndim) { remap_axis[iop] = NULL; } } /* end of for(iop) loop over operands */ return 0; } /* * Simplified version of the above, using axis to fill the remap_axis * array, which maps default to actual axes for each operand, indexed as * as remap_axis[iop][iaxis]. The default axis order has first all broadcast * axes and then the core axes the gufunc operates on. * * Returns 0 on success, and -1 on failure */ static int _parse_axis_arg(PyUFuncObject *ufunc, const int core_num_dims[], PyObject *axis, PyArrayObject **op, int broadcast_ndim, int **remap_axis) { int nop = ufunc->nargs; int iop, axis_int; axis_int = PyArray_PyIntAsInt(axis); if (error_converting(axis_int)) { return -1; } for (iop = 0; iop < nop; ++iop) { int axis, op_ndim, op_axis; /* _check_axis_support ensures core_num_dims is 0 or 1 */ if (core_num_dims[iop] == 0) { remap_axis[iop] = NULL; continue; } if (op[iop]) { op_ndim = PyArray_NDIM(op[iop]); } else { op_ndim = broadcast_ndim + 1; } op_axis = axis_int; /* ensure we don't modify axis_int */ if (check_and_adjust_axis(&op_axis, op_ndim) < 0) { return -1; } /* Are we actually remapping away from last axis? */ if (op_axis == op_ndim - 1) { remap_axis[iop] = NULL; continue; } remap_axis[iop][op_ndim - 1] = op_axis; for (axis = 0; axis < op_axis; axis++) { remap_axis[iop][axis] = axis; } for (axis = op_axis; axis < op_ndim - 1; axis++) { remap_axis[iop][axis] = axis + 1; } } /* end of for(iop) loop over operands */ return 0; } #define REMAP_AXIS(iop, axis) ((remap_axis != NULL && \ remap_axis[iop] != NULL)? \ remap_axis[iop][axis] : axis) /* * Validate the core dimensions of all the operands, and collect all of * the labelled core dimensions into 'core_dim_sizes'. * * Returns 0 on success, and -1 on failure * * The behavior has been changed in NumPy 1.16.0, and the following * requirements must be fulfilled or an error will be raised: * * Arguments, both input and output, must have at least as many * dimensions as the corresponding number of core dimensions. In * versions before 1.10, 1's were prepended to the shape as needed. * * Core dimensions with same labels must have exactly matching sizes. * In versions before 1.10, core dimensions of size 1 would broadcast * against other core dimensions with the same label. * * All core dimensions must have their size specified by a passed in * input or output argument. In versions before 1.10, core dimensions in * an output argument that were not specified in an input argument, * and whose size could not be inferred from a passed in output * argument, would have their size set to 1. * * Core dimensions may be fixed, new in NumPy 1.16 */ static int _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op, const int *op_core_num_dims, npy_uint32 *core_dim_flags, npy_intp *core_dim_sizes, int **remap_axis) { int i; int nin = ufunc->nin; int nout = ufunc->nout; int nop = nin + nout; for (i = 0; i < nop; ++i) { if (op[i] != NULL) { int idim; int dim_offset = ufunc->core_offsets[i]; int core_start_dim = PyArray_NDIM(op[i]) - op_core_num_dims[i]; int dim_delta = 0; /* checked before this routine gets called */ assert(core_start_dim >= 0); /* * Make sure every core dimension exactly matches all other core * dimensions with the same label. Note that flexible dimensions * may have been removed at this point, if so, they are marked * with UFUNC_CORE_DIM_MISSING. */ for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) { int core_index = dim_offset + idim; int core_dim_index = ufunc->core_dim_ixs[core_index]; npy_intp core_dim_size = core_dim_sizes[core_dim_index]; npy_intp op_dim_size; /* can only happen if flexible; dimension missing altogether */ if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) { op_dim_size = 1; dim_delta++; /* for indexing in dimensions */ } else { op_dim_size = PyArray_DIM(op[i], REMAP_AXIS(i, core_start_dim + idim - dim_delta)); } if (core_dim_sizes[core_dim_index] < 0) { core_dim_sizes[core_dim_index] = op_dim_size; } else if (op_dim_size != core_dim_size) { PyErr_Format(PyExc_ValueError, "%s: %s operand %d has a mismatch in its " "core dimension %d, with gufunc " "signature %s (size %zd is different " "from %zd)", ufunc_get_name_cstr(ufunc), i < nin ? "Input" : "Output", i < nin ? i : i - nin, idim - dim_delta, ufunc->core_signature, op_dim_size, core_dim_sizes[core_dim_index]); return -1; } } } } /* * Make sure no core dimension is unspecified. */ for (i = nin; i < nop; ++i) { int idim; int dim_offset = ufunc->core_offsets[i]; for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) { int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim]; /* check all cases where the size has not yet been set */ if (core_dim_sizes[core_dim_index] < 0) { /* * Oops, this dimension was never specified * (can only happen if output op not given) */ PyErr_Format(PyExc_ValueError, "%s: Output operand %d has core dimension %d " "unspecified, with gufunc signature %s", ufunc_get_name_cstr(ufunc), i - nin, idim, ufunc->core_signature); return -1; } } } return 0; } /* * Returns a new reference to the ufunc identity. Note that this identity * is only a default identity value stored on the ufunc, since the invidiual * ufunc loop (ArrayMethod) is queried for the actual identity. * * TODO: store a reference in the ufunc object itself, rather than * constructing one each time */ NPY_NO_EXPORT PyObject * PyUFunc_GetDefaultIdentity(PyUFuncObject *ufunc, npy_bool *reorderable) { switch(ufunc->identity) { case PyUFunc_One: *reorderable = 1; return PyLong_FromLong(1); case PyUFunc_Zero: *reorderable = 1; return PyLong_FromLong(0); case PyUFunc_MinusOne: *reorderable = 1; return PyLong_FromLong(-1); case PyUFunc_ReorderableNone: *reorderable = 1; Py_RETURN_NONE; case PyUFunc_None: *reorderable = 0; Py_RETURN_NONE; case PyUFunc_IdentityValue: *reorderable = 1; Py_INCREF(ufunc->identity_value); return ufunc->identity_value; default: PyErr_Format(PyExc_ValueError, "ufunc %s has an invalid identity", ufunc_get_name_cstr(ufunc)); return NULL; } } /* * Copy over parts of the ufunc structure that may need to be * changed during execution. Returns 0 on success; -1 otherwise. */ static int _initialize_variable_parts(PyUFuncObject *ufunc, int op_core_num_dims[], npy_intp core_dim_sizes[], npy_uint32 core_dim_flags[]) { int i; for (i = 0; i < ufunc->nargs; i++) { op_core_num_dims[i] = ufunc->core_num_dims[i]; } for (i = 0; i < ufunc->core_num_dim_ix; i++) { core_dim_sizes[i] = ufunc->core_dim_sizes[i]; core_dim_flags[i] = ufunc->core_dim_flags[i]; } return 0; } static int PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl, PyArray_Descr *operation_descrs[], PyArrayObject *op[], PyObject *extobj, NPY_CASTING casting, NPY_ORDER order, PyObject *axis, PyObject *axes, int keepdims) { int nin, nout; int i, j, idim, nop; const char *ufunc_name; int retval; int needs_api = 0; /* Use remapped axes for generalized ufunc */ int broadcast_ndim, iter_ndim; int op_core_num_dims[NPY_MAXARGS]; int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS]; int *op_axes[NPY_MAXARGS]; npy_uint32 core_dim_flags[NPY_MAXARGS]; npy_uint32 op_flags[NPY_MAXARGS]; npy_intp iter_shape[NPY_MAXARGS]; NpyIter *iter = NULL; npy_uint32 iter_flags; npy_intp total_problem_size; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; /* The dimensions which get passed to the inner loop */ npy_intp inner_dimensions[NPY_MAXDIMS+1]; /* The strides which get passed to the inner loop */ npy_intp *inner_strides = NULL; /* Auxiliary data allocated by the ufuncimpl (ArrayMethod) */ NpyAuxData *auxdata = NULL; /* The sizes of the core dimensions (# entries is ufunc->core_num_dim_ix) */ npy_intp *core_dim_sizes = inner_dimensions + 1; int core_dim_ixs_size; /* swapping around of axes */ int *remap_axis_memory = NULL; int **remap_axis = NULL; nin = ufunc->nin; nout = ufunc->nout; nop = nin + nout; ufunc_name = ufunc_get_name_cstr(ufunc); NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name); if (validate_casting(ufuncimpl, ufunc, op, operation_descrs, casting) < 0) { return -1; } /* Initialize possibly variable parts to the values from the ufunc */ retval = _initialize_variable_parts(ufunc, op_core_num_dims, core_dim_sizes, core_dim_flags); if (retval < 0) { goto fail; } /* * If keepdims was passed in (and thus changed from the initial value * on top), check the gufunc is suitable, i.e., that its inputs share * the same number of core dimensions, and its outputs have none. */ if (keepdims != -1) { retval = _check_keepdims_support(ufunc); if (retval < 0) { goto fail; } } if (axis != NULL) { retval = _check_axis_support(ufunc); if (retval < 0) { goto fail; } } /* * If keepdims is set and true, which means all input dimensions are * the same, signal that all output dimensions will be the same too. */ if (keepdims == 1) { int num_dims = op_core_num_dims[0]; for (i = nin; i < nop; ++i) { op_core_num_dims[i] = num_dims; } } else { /* keepdims was not set or was false; no adjustment necessary */ keepdims = 0; } /* * Check that operands have the minimum dimensions required. * (Just checks core; broadcast dimensions are tested by the iterator.) */ retval = _validate_num_dims(ufunc, op, core_dim_flags, op_core_num_dims); if (retval < 0) { goto fail; } /* * Figure out the number of iteration dimensions, which * is the broadcast result of all the non-core dimensions. * (We do allow outputs to broadcast inputs currently, if they are given. * This is in line with what normal ufuncs do.) */ broadcast_ndim = 0; for (i = 0; i < nop; ++i) { if (op[i] == NULL) { continue; } int n = PyArray_NDIM(op[i]) - op_core_num_dims[i]; if (n > broadcast_ndim) { broadcast_ndim = n; } } /* Possibly remap axes. */ if (axes != NULL || axis != NULL) { assert(!(axes != NULL && axis != NULL)); remap_axis = PyArray_malloc(sizeof(remap_axis[0]) * nop); remap_axis_memory = PyArray_malloc(sizeof(remap_axis_memory[0]) * nop * NPY_MAXDIMS); if (remap_axis == NULL || remap_axis_memory == NULL) { PyErr_NoMemory(); goto fail; } for (i=0; i < nop; i++) { remap_axis[i] = remap_axis_memory + i * NPY_MAXDIMS; } if (axis) { retval = _parse_axis_arg(ufunc, op_core_num_dims, axis, op, broadcast_ndim, remap_axis); } else { retval = _parse_axes_arg(ufunc, op_core_num_dims, axes, op, broadcast_ndim, remap_axis); } if(retval < 0) { goto fail; } } /* Collect the lengths of the labelled core dimensions */ retval = _get_coredim_sizes(ufunc, op, op_core_num_dims, core_dim_flags, core_dim_sizes, remap_axis); if(retval < 0) { goto fail; } /* * Figure out the number of iterator creation dimensions, * which is the broadcast dimensions + all the core dimensions of * the outputs, so that the iterator can allocate those output * dimensions following the rules of order='F', for example. */ iter_ndim = broadcast_ndim; for (i = nin; i < nop; ++i) { iter_ndim += op_core_num_dims[i]; } if (iter_ndim > NPY_MAXDIMS) { PyErr_Format(PyExc_ValueError, "too many dimensions for generalized ufunc %s", ufunc_name); retval = -1; goto fail; } /* Fill in the initial part of 'iter_shape' */ for (idim = 0; idim < broadcast_ndim; ++idim) { iter_shape[idim] = -1; } /* Fill in op_axes for all the operands */ j = broadcast_ndim; for (i = 0; i < nop; ++i) { int n; if (op[i]) { n = PyArray_NDIM(op[i]) - op_core_num_dims[i]; } else { n = broadcast_ndim; } /* Broadcast all the unspecified dimensions normally */ for (idim = 0; idim < broadcast_ndim; ++idim) { if (idim >= broadcast_ndim - n) { op_axes_arrays[i][idim] = REMAP_AXIS(i, idim - (broadcast_ndim - n)); } else { op_axes_arrays[i][idim] = -1; } } /* * Any output core dimensions shape should be ignored, so we add * it as a Reduce dimension (which can be broadcast with the rest). * These will be removed before the actual iteration for gufuncs. */ for (idim = broadcast_ndim; idim < iter_ndim; ++idim) { op_axes_arrays[i][idim] = NPY_ITER_REDUCTION_AXIS(-1); } /* Except for when it belongs to this output */ if (i >= nin) { int dim_offset = ufunc->core_offsets[i]; int num_removed = 0; /* * Fill in 'iter_shape' and 'op_axes' for the core dimensions * of this output. Here, we have to be careful: if keepdims * was used, then the axes are not real core dimensions, but * are being added back for broadcasting, so their size is 1. * If the axis was removed, we should skip altogether. */ if (keepdims) { for (idim = 0; idim < op_core_num_dims[i]; ++idim) { iter_shape[j] = 1; op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim); ++j; } } else { for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) { int core_index = dim_offset + idim; int core_dim_index = ufunc->core_dim_ixs[core_index]; if ((core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING)) { /* skip it */ num_removed++; continue; } iter_shape[j] = core_dim_sizes[ufunc->core_dim_ixs[core_index]]; op_axes_arrays[i][j] = REMAP_AXIS(i, n + idim - num_removed); ++j; } } } op_axes[i] = op_axes_arrays[i]; } #if NPY_UF_DBG_TRACING printf("iter shapes:"); for (j=0; j < iter_ndim; j++) { printf(" %ld", iter_shape[j]); } printf("\n"); #endif /* Get the buffersize and errormask */ if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) { retval = -1; goto fail; } NPY_UF_DBG_PRINT("Finding inner loop\n"); /* * We don't write to all elements, and the iterator may make * UPDATEIFCOPY temporary copies. The output arrays (unless they are * allocated by the iterator itself) must be considered READWRITE by the * iterator, so that the elements we don't write to are copied to the * possible temporary array. */ _ufunc_setup_flags(ufunc, NPY_ITER_COPY | NPY_UFUNC_DEFAULT_INPUT_FLAGS, NPY_ITER_UPDATEIFCOPY | NPY_ITER_WRITEONLY | NPY_UFUNC_DEFAULT_OUTPUT_FLAGS, op_flags); /* * Set up the iterator per-op flags. For generalized ufuncs, we * can't do buffering, so must COPY or UPDATEIFCOPY. */ iter_flags = ufunc->iter_flags | NPY_ITER_MULTI_INDEX | NPY_ITER_REFS_OK | NPY_ITER_ZEROSIZE_OK | NPY_ITER_COPY_IF_OVERLAP | NPY_ITER_DELAY_BUFALLOC; /* Create the iterator */ iter = NpyIter_AdvancedNew(nop, op, iter_flags, order, NPY_UNSAFE_CASTING, op_flags, operation_descrs, iter_ndim, op_axes, iter_shape, 0); if (iter == NULL) { retval = -1; goto fail; } /* Fill in any allocated outputs */ { PyArrayObject **operands = NpyIter_GetOperandArray(iter); for (i = nin; i < nop; ++i) { if (op[i] == NULL) { op[i] = operands[i]; Py_INCREF(op[i]); } } } /* * Set up the inner strides array. Because we're not doing * buffering, the strides are fixed throughout the looping. */ core_dim_ixs_size = 0; for (i = 0; i < nop; ++i) { core_dim_ixs_size += ufunc->core_num_dims[i]; } inner_strides = (npy_intp *)PyArray_malloc( NPY_SIZEOF_INTP * (nop+core_dim_ixs_size)); if (inner_strides == NULL) { PyErr_NoMemory(); retval = -1; goto fail; } /* Copy the strides after the first nop */ idim = nop; for (i = 0; i < nop; ++i) { /* * Need to use the arrays in the iterator, not op, because * a copy with a different-sized type may have been made. */ PyArrayObject *arr = NpyIter_GetOperandArray(iter)[i]; npy_intp *shape = PyArray_SHAPE(arr); npy_intp *strides = PyArray_STRIDES(arr); /* * Could be negative if flexible dims are used, but not for * keepdims, since those dimensions are allocated in arr. */ int core_start_dim = PyArray_NDIM(arr) - op_core_num_dims[i]; int num_removed = 0; int dim_offset = ufunc->core_offsets[i]; for (j = 0; j < ufunc->core_num_dims[i]; ++j) { int core_dim_index = ufunc->core_dim_ixs[dim_offset + j]; /* * Force zero stride when the shape is 1 (always the case for * for missing dimensions), so that broadcasting works right. */ if (core_dim_flags[core_dim_index] & UFUNC_CORE_DIM_MISSING) { num_removed++; inner_strides[idim++] = 0; } else { int remapped_axis = REMAP_AXIS(i, core_start_dim + j - num_removed); if (shape[remapped_axis] != 1) { inner_strides[idim++] = strides[remapped_axis]; } else { inner_strides[idim++] = 0; } } } } total_problem_size = NpyIter_GetIterSize(iter); if (total_problem_size < 0) { /* * Only used for threading, if negative (this means that it is * larger then ssize_t before axes removal) assume that the actual * problem is large enough to be threaded usefully. */ total_problem_size = 1000; } /* Remove all the core output dimensions from the iterator */ for (i = broadcast_ndim; i < iter_ndim; ++i) { if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) { retval = -1; goto fail; } } if (NpyIter_RemoveMultiIndex(iter) != NPY_SUCCEED) { retval = -1; goto fail; } if (NpyIter_EnableExternalLoop(iter) != NPY_SUCCEED) { retval = -1; goto fail; } /* * The first nop strides are for the inner loop (but only can * copy them after removing the core axes). The strides will not change * if the iterator is not buffered (they are effectively fixed). * Supporting buffering would make sense, but probably would have to be * done in the inner-loop itself (not the iterator). */ assert(!NpyIter_IsBuffered(iter)); memcpy(inner_strides, NpyIter_GetInnerStrideArray(iter), NPY_SIZEOF_INTP * nop); /* Final preparation of the arraymethod call */ PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = operation_descrs, }; PyArrayMethod_StridedLoop *strided_loop; NPY_ARRAYMETHOD_FLAGS flags = 0; if (ufuncimpl->get_strided_loop(&context, 1, 0, inner_strides, &strided_loop, &auxdata, &flags) < 0) { goto fail; } needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; needs_api |= NpyIter_IterationNeedsAPI(iter); if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); } NPY_UF_DBG_PRINT("Executing inner loop\n"); if (NpyIter_GetIterSize(iter) != 0) { /* Do the ufunc loop */ NpyIter_IterNextFunc *iternext; char **dataptr; npy_intp *count_ptr; NPY_BEGIN_THREADS_DEF; /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { retval = -1; goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); count_ptr = NpyIter_GetInnerLoopSizePtr(iter); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(total_problem_size); } do { inner_dimensions[0] = *count_ptr; retval = strided_loop(&context, dataptr, inner_dimensions, inner_strides, auxdata); } while (retval == 0 && iternext(iter)); if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) { NPY_END_THREADS; } } if (retval == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ retval = _check_ufunc_fperr(errormask, extobj, ufunc_name); } PyArray_free(inner_strides); NPY_AUXDATA_FREE(auxdata); if (!NpyIter_Deallocate(iter)) { retval = -1; } PyArray_free(remap_axis_memory); PyArray_free(remap_axis); NPY_UF_DBG_PRINT1("Returning code %d\n", retval); return retval; fail: NPY_UF_DBG_PRINT1("Returning failure code %d\n", retval); PyArray_free(inner_strides); NPY_AUXDATA_FREE(auxdata); NpyIter_Deallocate(iter); PyArray_free(remap_axis_memory); PyArray_free(remap_axis); return retval; } static int PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl, PyArray_Descr *operation_descrs[], PyArrayObject *op[], PyObject *extobj, NPY_CASTING casting, NPY_ORDER order, PyObject *output_array_prepare[], ufunc_full_args full_args, PyArrayObject *wheremask) { int nin = ufunc->nin, nout = ufunc->nout, nop = nin + nout; const char *ufunc_name = ufunc_get_name_cstr(ufunc); npy_intp default_op_out_flags; npy_uint32 op_flags[NPY_MAXARGS]; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s\n", ufunc_name); /* Get the buffersize and errormask */ if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) { return -1; } if (wheremask != NULL) { /* Set up the flags. */ default_op_out_flags = NPY_ITER_NO_SUBTYPE | NPY_ITER_WRITEMASKED | NPY_UFUNC_DEFAULT_OUTPUT_FLAGS; _ufunc_setup_flags(ufunc, NPY_UFUNC_DEFAULT_INPUT_FLAGS, default_op_out_flags, op_flags); } else { /* Set up the flags. */ default_op_out_flags = NPY_ITER_WRITEONLY | NPY_UFUNC_DEFAULT_OUTPUT_FLAGS; _ufunc_setup_flags(ufunc, NPY_UFUNC_DEFAULT_INPUT_FLAGS, default_op_out_flags, op_flags); } /* Final preparation of the arraymethod call */ PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = operation_descrs, }; /* Do the ufunc loop */ if (wheremask != NULL) { NPY_UF_DBG_PRINT("Executing masked inner loop\n"); if (nop + 1 > NPY_MAXARGS) { PyErr_SetString(PyExc_ValueError, "Too many operands when including where= parameter"); return -1; } op[nop] = wheremask; operation_descrs[nop] = NULL; return execute_ufunc_loop(&context, 1, op, order, buffersize, casting, output_array_prepare, full_args, op_flags, errormask, extobj); } else { NPY_UF_DBG_PRINT("Executing normal inner loop\n"); /* * This checks whether a trivial loop is ok, making copies of * scalar and one dimensional operands if that should help. */ int trivial_ok = check_for_trivial_loop(ufuncimpl, op, operation_descrs, casting, buffersize); if (trivial_ok < 0) { return -1; } if (trivial_ok && context.method->nout == 1) { /* Try to handle everything without using the (heavy) iterator */ int retval = try_trivial_single_output_loop(&context, op, order, output_array_prepare, full_args, errormask, extobj); if (retval != -2) { return retval; } } return execute_ufunc_loop(&context, 0, op, order, buffersize, casting, output_array_prepare, full_args, op_flags, errormask, extobj); } } /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_GenericFunction(PyUFuncObject *NPY_UNUSED(ufunc), PyObject *NPY_UNUSED(args), PyObject *NPY_UNUSED(kwds), PyArrayObject **NPY_UNUSED(op)) { /* NumPy 1.21, 2020-03-29 */ PyErr_SetString(PyExc_RuntimeError, "The `PyUFunc_GenericFunction()` C-API function has been disabled. " "Please use `PyObject_Call(ufunc, args, kwargs)`, which has " "identical behaviour but allows subclass and `__array_ufunc__` " "override handling and only returns the normal ufunc result."); return -1; } /* * Promote and resolve a reduction like operation. * * @param ufunc * @param arr The operation array * @param out The output array or NULL if not provided. Note that NumPy always * used out to mean the same as `dtype=out.dtype` and never passed * the array itself to the type-resolution. * @param signature The DType signature, which may already be set due to the * dtype passed in by the user, or the special cases (add, multiply). * (Contains strong references and may be modified.) * @param enforce_uniform_args If `NPY_TRUE` fully uniform dtypes/descriptors * are enforced as required for accumulate and (currently) reduceat. * @param out_descrs New references to the resolved descriptors (on success). * @param method The ufunc method, "reduce", "reduceat", or "accumulate". * @returns ufuncimpl The `ArrayMethod` implementation to use. Or NULL if an * error occurred. */ static PyArrayMethodObject * reducelike_promote_and_resolve(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, PyArray_DTypeMeta *signature[3], npy_bool enforce_uniform_args, PyArray_Descr *out_descrs[3], NPY_CASTING casting, char *method) { /* * If no dtype is specified and out is not specified, we override the * integer and bool dtype used for add and multiply. * * TODO: The following should be handled by a promoter! */ if (signature[0] == NULL && out == NULL) { /* * For integer types --- make sure at least a long * is used for add and multiply reduction to avoid overflow */ int typenum = PyArray_TYPE(arr); if ((PyTypeNum_ISBOOL(typenum) || PyTypeNum_ISINTEGER(typenum)) && ((strcmp(ufunc->name, "add") == 0) || (strcmp(ufunc->name, "multiply") == 0))) { if (PyTypeNum_ISBOOL(typenum)) { typenum = NPY_LONG; } else if ((size_t)PyArray_DESCR(arr)->elsize < sizeof(long)) { if (PyTypeNum_ISUNSIGNED(typenum)) { typenum = NPY_ULONG; } else { typenum = NPY_LONG; } } signature[0] = PyArray_DTypeFromTypeNum(typenum); } } assert(signature[2] == NULL); /* we always fill it here */ Py_XINCREF(signature[0]); signature[2] = signature[0]; /* * Note that the `ops` is not really correct. But legacy resolution * cannot quite handle the correct ops (e.g. a NULL first item if `out` * is NULL) so we pass `arr` instead in that case. */ PyArrayObject *ops[3] = {out ? out : arr, arr, out}; /* * TODO: This is a dangerous hack, that works by relying on the GIL, it is * terrible, terrifying, and trusts that nobody does crazy stuff * in their type-resolvers. * By mutating the `out` dimension, we ensure that reduce-likes * live in a future without value-based promotion even when legacy * promotion has to be used. */ npy_bool evil_ndim_mutating_hack = NPY_FALSE; if (out != NULL && PyArray_NDIM(out) == 0 && PyArray_NDIM(arr) != 0) { evil_ndim_mutating_hack = NPY_TRUE; ((PyArrayObject_fields *)out)->nd = 1; } /* * TODO: If `out` is not provided, arguably `initial` could define * the first DType (and maybe also the out one), that way * `np.add.reduce([1, 2, 3], initial=3.4)` would return a float * value. As of 1.20, it returned an integer, so that should * probably go to an error/warning first. */ PyArray_DTypeMeta *operation_DTypes[3] = { NULL, NPY_DTYPE(PyArray_DESCR(arr)), NULL}; Py_INCREF(operation_DTypes[1]); if (out != NULL) { operation_DTypes[0] = NPY_DTYPE(PyArray_DESCR(out)); Py_INCREF(operation_DTypes[0]); operation_DTypes[2] = operation_DTypes[0]; Py_INCREF(operation_DTypes[2]); } PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE, NPY_FALSE, NPY_TRUE); if (evil_ndim_mutating_hack) { ((PyArrayObject_fields *)out)->nd = 0; } /* DTypes may currently get filled in fallbacks and XDECREF for error: */ Py_XDECREF(operation_DTypes[0]); Py_XDECREF(operation_DTypes[1]); Py_XDECREF(operation_DTypes[2]); if (ufuncimpl == NULL) { return NULL; } /* * Find the correct descriptors for the operation. We use unsafe casting * for historic reasons: The logic ufuncs required it to cast everything to * boolean. However, we now special case the logical ufuncs, so that the * casting safety could in principle be set to the default same-kind. * (although this should possibly happen through a deprecation) */ if (resolve_descriptors(3, ufunc, ufuncimpl, ops, out_descrs, signature, casting) < 0) { return NULL; } /* * The first operand and output should be the same array, so they should * be identical. The second argument can be different for reductions, * but is checked to be identical for accumulate and reduceat. * Ideally, the type-resolver ensures that all are identical, but we do * not enforce this here strictly. Otherwise correct handling of * byte-order changes (or metadata) requires a lot of care; see gh-20699. */ if (!PyArray_EquivTypes(out_descrs[0], out_descrs[2]) || ( enforce_uniform_args && !PyArray_EquivTypes( out_descrs[0], out_descrs[1]))) { PyErr_Format(PyExc_TypeError, "the resolved dtypes are not compatible with %s.%s. " "Resolved (%R, %R, %R)", ufunc_get_name_cstr(ufunc), method, out_descrs[0], out_descrs[1], out_descrs[2]); goto fail; } /* TODO: This really should _not_ be unsafe casting (same above)! */ if (validate_casting(ufuncimpl, ufunc, ops, out_descrs, casting) < 0) { goto fail; } return ufuncimpl; fail: for (int i = 0; i < 3; ++i) { Py_CLEAR(out_descrs[i]); } return NULL; } static int reduce_loop(PyArrayMethod_Context *context, PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata, NpyIter *iter, char **dataptrs, npy_intp const *strides, npy_intp const *countptr, NpyIter_IterNextFunc *iternext, int needs_api, npy_intp skip_first_count) { int retval = 0; char *dataptrs_copy[4]; npy_intp strides_copy[4]; npy_bool masked; NPY_BEGIN_THREADS_DEF; /* Get the number of operands, to determine whether "where" is used */ masked = (NpyIter_GetNOp(iter) == 3); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); } if (skip_first_count > 0) { assert(!masked); /* Path currently not available for masked */ while (1) { npy_intp count = *countptr; /* Skip any first-visit elements */ if (NpyIter_IsFirstVisit(iter, 0)) { if (strides[0] == 0) { --count; --skip_first_count; dataptrs[1] += strides[1]; } else { skip_first_count -= count; count = 0; } } if (count > 0) { /* Turn the two items into three for the inner loop */ dataptrs_copy[0] = dataptrs[0]; dataptrs_copy[1] = dataptrs[1]; dataptrs_copy[2] = dataptrs[0]; strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; retval = strided_loop(context, dataptrs_copy, &count, strides_copy, auxdata); if (retval < 0) { goto finish_loop; } } /* Advance loop, and abort on error (or finish) */ if (!iternext(iter)) { goto finish_loop; } /* When skipping is done break and continue with faster loop */ if (skip_first_count == 0) { break; } } } do { /* Turn the two items into three for the inner loop */ dataptrs_copy[0] = dataptrs[0]; dataptrs_copy[1] = dataptrs[1]; dataptrs_copy[2] = dataptrs[0]; strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; if (masked) { dataptrs_copy[3] = dataptrs[2]; strides_copy[3] = strides[2]; } retval = strided_loop(context, dataptrs_copy, countptr, strides_copy, auxdata); if (retval < 0) { goto finish_loop; } } while (iternext(iter)); finish_loop: NPY_END_THREADS; return retval; } /* * The implementation of the reduction operators with the new iterator * turned into a bit of a long function here, but I think the design * of this part needs to be changed to be more like einsum, so it may * not be worth refactoring it too much. Consider this timing: * * >>> a = arange(10000) * * >>> timeit sum(a) * 10000 loops, best of 3: 17 us per loop * * >>> timeit einsum("i->",a) * 100000 loops, best of 3: 13.5 us per loop * * The axes must already be bounds-checked by the calling function, * this function does not validate them. */ static PyArrayObject * PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, int naxes, int *axes, PyArray_DTypeMeta *signature[3], int keepdims, PyObject *initial, PyArrayObject *wheremask) { int iaxes, ndim; npy_bool axis_flags[NPY_MAXDIMS]; const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ int buffersize = 0, errormask = 0; NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s.reduce\n", ufunc_name); ndim = PyArray_NDIM(arr); /* Create an array of flags for reduction */ memset(axis_flags, 0, ndim); for (iaxes = 0; iaxes < naxes; ++iaxes) { int axis = axes[iaxes]; if (axis_flags[axis]) { PyErr_SetString(PyExc_ValueError, "duplicate value in 'axis'"); return NULL; } axis_flags[axis] = 1; } if (_get_bufsize_errmask(NULL, "reduce", &buffersize, &errormask) < 0) { return NULL; } PyArray_Descr *descrs[3]; PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, arr, out, signature, NPY_FALSE, descrs, NPY_UNSAFE_CASTING, "reduce"); if (ufuncimpl == NULL) { return NULL; } PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = descrs, }; PyArrayObject *result = PyUFunc_ReduceWrapper(&context, arr, out, wheremask, axis_flags, keepdims, initial, reduce_loop, buffersize, ufunc_name, errormask); for (int i = 0; i < 3; i++) { Py_DECREF(descrs[i]); } return result; } static PyObject * PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, int axis, PyArray_DTypeMeta *signature[3]) { PyArrayObject *op[2]; int op_axes_arrays[2][NPY_MAXDIMS]; int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]}; npy_uint32 op_flags[2]; int idim, ndim; int needs_api, need_outer_iterator; int res = 0; PyArrayMethod_StridedLoop *strided_loop; NpyAuxData *auxdata = NULL; NpyIter *iter = NULL; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; NPY_BEGIN_THREADS_DEF; NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s.accumulate\n", ufunc_name); #if 0 printf("Doing %s.accumulate on array with dtype : ", ufunc_name); PyObject_Print((PyObject *)PyArray_DESCR(arr), stdout, 0); printf("\n"); #endif if (_get_bufsize_errmask(NULL, "accumulate", &buffersize, &errormask) < 0) { return NULL; } /* Take a reference to out for later returning */ Py_XINCREF(out); PyArray_Descr *descrs[3]; PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, arr, out, signature, NPY_TRUE, descrs, NPY_UNSAFE_CASTING, "accumulate"); if (ufuncimpl == NULL) { return NULL; } /* * The below code assumes that all descriptors are interchangeable, we * allow them to not be strictly identical (but they typically should be) */ assert(PyArray_EquivTypes(descrs[0], descrs[1]) && PyArray_EquivTypes(descrs[0], descrs[2])); if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) { /* This can be removed, but the initial element copy needs fixing */ PyErr_SetString(PyExc_TypeError, "accumulation currently only supports `object` dtype with " "references"); goto fail; } PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = descrs, }; ndim = PyArray_NDIM(arr); #if NPY_UF_DBG_TRACING printf("Found %s.accumulate inner loop with dtype : ", ufunc_name); PyObject_Print((PyObject *)op_dtypes[0], stdout, 0); printf("\n"); #endif /* Set up the op_axes for the outer loop */ for (idim = 0; idim < ndim; ++idim) { op_axes_arrays[0][idim] = idim; op_axes_arrays[1][idim] = idim; } /* The per-operand flags for the outer loop */ op_flags[0] = NPY_ITER_READWRITE | NPY_ITER_NO_BROADCAST | NPY_ITER_ALLOCATE | NPY_ITER_NO_SUBTYPE; op_flags[1] = NPY_ITER_READONLY; op[0] = out; op[1] = arr; need_outer_iterator = (ndim > 1); /* We can't buffer, so must do UPDATEIFCOPY */ if (!PyArray_ISALIGNED(arr) || (out && !PyArray_ISALIGNED(out)) || !PyArray_EquivTypes(descrs[1], PyArray_DESCR(arr)) || (out && !PyArray_EquivTypes(descrs[0], PyArray_DESCR(out)))) { need_outer_iterator = 1; } /* If input and output overlap in memory, use iterator to figure it out */ else if (out != NULL && solve_may_share_memory(out, arr, NPY_MAY_SHARE_BOUNDS) != 0) { need_outer_iterator = 1; } if (need_outer_iterator) { int ndim_iter = 0; npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| NPY_ITER_COPY_IF_OVERLAP; /* * The way accumulate is set up, we can't do buffering, * so make a copy instead when necessary. */ ndim_iter = ndim; flags |= NPY_ITER_MULTI_INDEX; /* * Add some more flags. * * The accumulation outer loop is 'elementwise' over the array, so turn * on NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE. That is, in-place * accumulate(x, out=x) is safe to do without temporary copies. */ op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; NPY_UF_DBG_PRINT("Allocating outer iterator\n"); iter = NpyIter_AdvancedNew(2, op, flags, NPY_KEEPORDER, NPY_UNSAFE_CASTING, op_flags, descrs, ndim_iter, op_axes, NULL, 0); if (iter == NULL) { goto fail; } /* In case COPY or UPDATEIFCOPY occurred */ op[0] = NpyIter_GetOperandArray(iter)[0]; op[1] = NpyIter_GetOperandArray(iter)[1]; if (NpyIter_RemoveAxis(iter, axis) != NPY_SUCCEED) { goto fail; } if (NpyIter_RemoveMultiIndex(iter) != NPY_SUCCEED) { goto fail; } } /* Get the output from the iterator if it was allocated */ if (out == NULL) { if (iter) { op[0] = out = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(out); } else { PyArray_Descr *dtype = descrs[0]; Py_INCREF(dtype); op[0] = out = (PyArrayObject *)PyArray_NewFromDescr( &PyArray_Type, dtype, ndim, PyArray_DIMS(op[1]), NULL, NULL, 0, NULL); if (out == NULL) { goto fail; } } } npy_intp fixed_strides[3]; if (need_outer_iterator) { NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); } else { fixed_strides[0] = PyArray_STRIDES(op[0])[axis]; fixed_strides[1] = PyArray_STRIDES(op[1])[axis]; fixed_strides[2] = fixed_strides[0]; } NPY_ARRAYMETHOD_FLAGS flags = 0; if (ufuncimpl->get_strided_loop(&context, 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { goto fail; } needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); } /* * If the reduction axis has size zero, either return the reduction * unit for UFUNC_REDUCE, or return the zero-sized output array * for UFUNC_ACCUMULATE. */ if (PyArray_DIM(op[1], axis) == 0) { goto finish; } else if (PyArray_SIZE(op[0]) == 0) { goto finish; } if (iter && NpyIter_GetIterSize(iter) != 0) { char *dataptr_copy[3]; npy_intp stride_copy[3]; npy_intp count_m1, stride0, stride1; NpyIter_IterNextFunc *iternext; char **dataptr; int itemsize = descrs[0]->elsize; /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); needs_api |= NpyIter_IterationNeedsAPI(iter); /* Execute the loop with just the outer iterator */ count_m1 = PyArray_DIM(op[1], axis)-1; stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis); NPY_UF_DBG_PRINT("UFunc: Reduce loop with just outer iterator\n"); stride0 = PyArray_STRIDE(op[0], axis); stride_copy[0] = stride0; stride_copy[1] = stride1; stride_copy[2] = stride0; if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); } do { dataptr_copy[0] = dataptr[0]; dataptr_copy[1] = dataptr[1]; dataptr_copy[2] = dataptr[0]; /* * Copy the first element to start the reduction. * * Output (dataptr[0]) and input (dataptr[1]) may point to * the same memory, e.g. np.add.accumulate(a, out=a). */ if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. */ Py_XINCREF(*(PyObject **)dataptr_copy[1]); Py_XDECREF(*(PyObject **)dataptr_copy[0]); *(PyObject **)dataptr_copy[0] = *(PyObject **)dataptr_copy[1]; } else { memmove(dataptr_copy[0], dataptr_copy[1], itemsize); } if (count_m1 > 0) { /* Turn the two items into three for the inner loop */ dataptr_copy[1] += stride1; dataptr_copy[2] += stride0; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count_m1); res = strided_loop(&context, dataptr_copy, &count_m1, stride_copy, auxdata); } } while (res == 0 && iternext(iter)); NPY_END_THREADS; } else if (iter == NULL) { char *dataptr_copy[3]; int itemsize = descrs[0]->elsize; /* Execute the loop with no iterators */ npy_intp count = PyArray_DIM(op[1], axis); npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis); NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n"); if (PyArray_NDIM(op[0]) != PyArray_NDIM(op[1]) || !PyArray_CompareLists(PyArray_DIMS(op[0]), PyArray_DIMS(op[1]), PyArray_NDIM(op[0]))) { PyErr_SetString(PyExc_ValueError, "provided out is the wrong size " "for the accumulation."); goto fail; } stride0 = PyArray_STRIDE(op[0], axis); /* Turn the two items into three for the inner loop */ dataptr_copy[0] = PyArray_BYTES(op[0]); dataptr_copy[1] = PyArray_BYTES(op[1]); dataptr_copy[2] = PyArray_BYTES(op[0]); /* * Copy the first element to start the reduction. * * Output (dataptr[0]) and input (dataptr[1]) may point to the * same memory, e.g. np.add.accumulate(a, out=a). */ if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. */ Py_XINCREF(*(PyObject **)dataptr_copy[1]); Py_XDECREF(*(PyObject **)dataptr_copy[0]); *(PyObject **)dataptr_copy[0] = *(PyObject **)dataptr_copy[1]; } else { memmove(dataptr_copy[0], dataptr_copy[1], itemsize); } if (count > 1) { --count; dataptr_copy[1] += stride1; dataptr_copy[2] += stride0; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); needs_api = PyDataType_REFCHK(descrs[0]); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(count); } res = strided_loop(&context, dataptr_copy, &count, fixed_strides, auxdata); NPY_END_THREADS; } } finish: NPY_AUXDATA_FREE(auxdata); Py_DECREF(descrs[0]); Py_DECREF(descrs[1]); Py_DECREF(descrs[2]); if (!NpyIter_Deallocate(iter)) { res = -1; } if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ res = _check_ufunc_fperr(errormask, NULL, "accumulate"); } if (res < 0) { Py_DECREF(out); return NULL; } return (PyObject *)out; fail: Py_XDECREF(out); NPY_AUXDATA_FREE(auxdata); Py_XDECREF(descrs[0]); Py_XDECREF(descrs[1]); Py_XDECREF(descrs[2]); NpyIter_Deallocate(iter); return NULL; } /* * Reduceat performs a reduce over an axis using the indices as a guide * * op.reduceat(array,indices) computes * op.reduce(array[indices[i]:indices[i+1]] * for i=0..end with an implicit indices[i+1]=len(array) * assumed when i=end-1 * * if indices[i+1] <= indices[i]+1 * then the result is array[indices[i]] for that value * * op.accumulate(array) is the same as * op.reduceat(array,indices)[::2] * where indices is range(len(array)-1) with a zero placed in every other sample * indices = zeros(len(array)*2-1) * indices[1::2] = range(1,len(array)) * * output shape is based on the size of indices * * TODO: Reduceat duplicates too much code from accumulate! */ static PyObject * PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, PyArrayObject *out, int axis, PyArray_DTypeMeta *signature[3]) { PyArrayObject *op[3]; int op_axes_arrays[3][NPY_MAXDIMS]; int *op_axes[3] = {op_axes_arrays[0], op_axes_arrays[1], op_axes_arrays[2]}; npy_uint32 op_flags[3]; int idim, ndim; int needs_api, need_outer_iterator = 0; int res = 0; NpyIter *iter = NULL; PyArrayMethod_StridedLoop *strided_loop; NpyAuxData *auxdata = NULL; /* The reduceat indices - ind must be validated outside this call */ npy_intp *reduceat_ind; npy_intp i, ind_size, red_axis_size; const char *ufunc_name = ufunc_get_name_cstr(ufunc); char *opname = "reduceat"; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; NPY_BEGIN_THREADS_DEF; reduceat_ind = (npy_intp *)PyArray_DATA(ind); ind_size = PyArray_DIM(ind, 0); red_axis_size = PyArray_DIM(arr, axis); /* Check for out-of-bounds values in indices array */ for (i = 0; i < ind_size; ++i) { if (reduceat_ind[i] < 0 || reduceat_ind[i] >= red_axis_size) { PyErr_Format(PyExc_IndexError, "index %" NPY_INTP_FMT " out-of-bounds in %s.%s [0, %" NPY_INTP_FMT ")", reduceat_ind[i], ufunc_name, opname, red_axis_size); return NULL; } } NPY_UF_DBG_PRINT2("\nEvaluating ufunc %s.%s\n", ufunc_name, opname); #if 0 printf("Doing %s.%s on array with dtype : ", ufunc_name, opname); PyObject_Print((PyObject *)PyArray_DESCR(arr), stdout, 0); printf("\n"); printf("Index size is %d\n", (int)ind_size); #endif if (_get_bufsize_errmask(NULL, opname, &buffersize, &errormask) < 0) { return NULL; } /* Take a reference to out for later returning */ Py_XINCREF(out); PyArray_Descr *descrs[3]; PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, arr, out, signature, NPY_TRUE, descrs, NPY_UNSAFE_CASTING, "reduceat"); if (ufuncimpl == NULL) { return NULL; } /* * The below code assumes that all descriptors are interchangeable, we * allow them to not be strictly identical (but they typically should be) */ assert(PyArray_EquivTypes(descrs[0], descrs[1]) && PyArray_EquivTypes(descrs[0], descrs[2])); if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) { /* This can be removed, but the initial element copy needs fixing */ PyErr_SetString(PyExc_TypeError, "reduceat currently only supports `object` dtype with " "references"); goto fail; } PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = descrs, }; ndim = PyArray_NDIM(arr); #if NPY_UF_DBG_TRACING printf("Found %s.%s inner loop with dtype : ", ufunc_name, opname); PyObject_Print((PyObject *)op_dtypes[0], stdout, 0); printf("\n"); #endif /* Set up the op_axes for the outer loop */ for (idim = 0; idim < ndim; ++idim) { /* Use the i-th iteration dimension to match up ind */ if (idim == axis) { op_axes_arrays[0][idim] = axis; op_axes_arrays[1][idim] = -1; op_axes_arrays[2][idim] = 0; } else { op_axes_arrays[0][idim] = idim; op_axes_arrays[1][idim] = idim; op_axes_arrays[2][idim] = -1; } } op[0] = out; op[1] = arr; op[2] = ind; if (out != NULL || ndim > 1 || !PyArray_ISALIGNED(arr) || !PyArray_EquivTypes(descrs[0], PyArray_DESCR(arr))) { need_outer_iterator = 1; } if (need_outer_iterator) { PyArray_Descr *op_dtypes[3] = {descrs[0], descrs[1], NULL}; npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| NPY_ITER_MULTI_INDEX| NPY_ITER_COPY_IF_OVERLAP; /* * The way reduceat is set up, we can't do buffering, * so make a copy instead when necessary using * the UPDATEIFCOPY flag */ /* The per-operand flags for the outer loop */ op_flags[0] = NPY_ITER_READWRITE| NPY_ITER_NO_BROADCAST| NPY_ITER_ALLOCATE| NPY_ITER_NO_SUBTYPE| NPY_ITER_UPDATEIFCOPY| NPY_ITER_ALIGNED; op_flags[1] = NPY_ITER_READONLY| NPY_ITER_COPY| NPY_ITER_ALIGNED; op_flags[2] = NPY_ITER_READONLY; op_dtypes[1] = op_dtypes[0]; NPY_UF_DBG_PRINT("Allocating outer iterator\n"); iter = NpyIter_AdvancedNew(3, op, flags, NPY_KEEPORDER, NPY_UNSAFE_CASTING, op_flags, op_dtypes, ndim, op_axes, NULL, 0); if (iter == NULL) { goto fail; } /* Remove the inner loop axis from the outer iterator */ if (NpyIter_RemoveAxis(iter, axis) != NPY_SUCCEED) { goto fail; } if (NpyIter_RemoveMultiIndex(iter) != NPY_SUCCEED) { goto fail; } /* In case COPY or UPDATEIFCOPY occurred */ op[0] = NpyIter_GetOperandArray(iter)[0]; op[1] = NpyIter_GetOperandArray(iter)[1]; op[2] = NpyIter_GetOperandArray(iter)[2]; if (out == NULL) { out = op[0]; Py_INCREF(out); } } else { /* * Allocate the output for when there's no outer iterator, we always * use the outer_iteration path when `out` is passed. */ assert(out == NULL); Py_INCREF(descrs[0]); op[0] = out = (PyArrayObject *)PyArray_NewFromDescr( &PyArray_Type, descrs[0], 1, &ind_size, NULL, NULL, 0, NULL); if (out == NULL) { goto fail; } } npy_intp fixed_strides[3]; if (need_outer_iterator) { NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); } else { fixed_strides[1] = PyArray_STRIDES(op[1])[axis]; } /* The reduce axis does not advance here in the strided-loop */ fixed_strides[0] = 0; fixed_strides[2] = 0; NPY_ARRAYMETHOD_FLAGS flags = 0; if (ufuncimpl->get_strided_loop(&context, 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { goto fail; } needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); } /* * If the output has zero elements, return now. */ if (PyArray_SIZE(op[0]) == 0) { goto finish; } if (iter && NpyIter_GetIterSize(iter) != 0) { char *dataptr_copy[3]; npy_intp stride_copy[3]; NpyIter_IterNextFunc *iternext; char **dataptr; npy_intp count_m1; npy_intp stride0, stride1; npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); int itemsize = descrs[0]->elsize; needs_api |= NpyIter_IterationNeedsAPI(iter); /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); if (iternext == NULL) { goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); /* Execute the loop with just the outer iterator */ count_m1 = PyArray_DIM(op[1], axis)-1; stride0 = 0; stride1 = PyArray_STRIDE(op[1], axis); NPY_UF_DBG_PRINT("UFunc: Reduce loop with just outer iterator\n"); stride_copy[0] = stride0; stride_copy[1] = stride1; stride_copy[2] = stride0; if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); } do { for (i = 0; i < ind_size; ++i) { npy_intp start = reduceat_ind[i], end = (i == ind_size-1) ? count_m1+1 : reduceat_ind[i+1]; npy_intp count = end - start; dataptr_copy[0] = dataptr[0] + stride0_ind*i; dataptr_copy[1] = dataptr[1] + stride1*start; dataptr_copy[2] = dataptr[0] + stride0_ind*i; /* * Copy the first element to start the reduction. * * Output (dataptr[0]) and input (dataptr[1]) may point * to the same memory, e.g. * np.add.reduceat(a, np.arange(len(a)), out=a). */ if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of * the reference count being zero temporarily. */ Py_XINCREF(*(PyObject **)dataptr_copy[1]); Py_XDECREF(*(PyObject **)dataptr_copy[0]); *(PyObject **)dataptr_copy[0] = *(PyObject **)dataptr_copy[1]; } else { memmove(dataptr_copy[0], dataptr_copy[1], itemsize); } if (count > 1) { /* Inner loop like REDUCE */ --count; dataptr_copy[1] += stride1; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); res = strided_loop(&context, dataptr_copy, &count, stride_copy, auxdata); } } } while (res == 0 && iternext(iter)); NPY_END_THREADS; } else if (iter == NULL) { char *dataptr_copy[3]; int itemsize = descrs[0]->elsize; npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); npy_intp stride1 = PyArray_STRIDE(op[1], axis); NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n"); if (!needs_api) { NPY_BEGIN_THREADS; } for (i = 0; i < ind_size; ++i) { npy_intp start = reduceat_ind[i], end = (i == ind_size-1) ? PyArray_DIM(arr,axis) : reduceat_ind[i+1]; npy_intp count = end - start; dataptr_copy[0] = PyArray_BYTES(op[0]) + stride0_ind*i; dataptr_copy[1] = PyArray_BYTES(op[1]) + stride1*start; dataptr_copy[2] = PyArray_BYTES(op[0]) + stride0_ind*i; /* * Copy the first element to start the reduction. * * Output (dataptr[0]) and input (dataptr[1]) may point to * the same memory, e.g. * np.add.reduceat(a, np.arange(len(a)), out=a). */ if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. */ Py_XINCREF(*(PyObject **)dataptr_copy[1]); Py_XDECREF(*(PyObject **)dataptr_copy[0]); *(PyObject **)dataptr_copy[0] = *(PyObject **)dataptr_copy[1]; } else { memmove(dataptr_copy[0], dataptr_copy[1], itemsize); } if (count > 1) { /* Inner loop like REDUCE */ --count; dataptr_copy[1] += stride1; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); res = strided_loop(&context, dataptr_copy, &count, fixed_strides, auxdata); if (res != 0) { break; } } } NPY_END_THREADS; } finish: NPY_AUXDATA_FREE(auxdata); Py_DECREF(descrs[0]); Py_DECREF(descrs[1]); Py_DECREF(descrs[2]); if (!NpyIter_Deallocate(iter)) { res = -1; } if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ res = _check_ufunc_fperr(errormask, NULL, "reduceat"); } if (res < 0) { Py_DECREF(out); return NULL; } return (PyObject *)out; fail: Py_XDECREF(out); NPY_AUXDATA_FREE(auxdata); Py_XDECREF(descrs[0]); Py_XDECREF(descrs[1]); Py_XDECREF(descrs[2]); NpyIter_Deallocate(iter); return NULL; } static npy_bool tuple_all_none(PyObject *tup) { npy_intp i; for (i = 0; i < PyTuple_GET_SIZE(tup); ++i) { if (PyTuple_GET_ITEM(tup, i) != Py_None) { return NPY_FALSE; } } return NPY_TRUE; } static int _set_full_args_out(int nout, PyObject *out_obj, ufunc_full_args *full_args) { if (PyTuple_CheckExact(out_obj)) { if (PyTuple_GET_SIZE(out_obj) != nout) { PyErr_SetString(PyExc_ValueError, "The 'out' tuple must have exactly " "one entry per ufunc output"); return -1; } if (tuple_all_none(out_obj)) { return 0; } else { Py_INCREF(out_obj); full_args->out = out_obj; } } else if (nout == 1) { if (out_obj == Py_None) { return 0; } /* Can be an array if it only has one output */ full_args->out = PyTuple_Pack(1, out_obj); if (full_args->out == NULL) { return -1; } } else { PyErr_SetString(PyExc_TypeError, nout > 1 ? "'out' must be a tuple of arrays" : "'out' must be an array or a tuple with " "a single array"); return -1; } return 0; } /* * Convert function which replaces np._NoValue with NULL. * As a converter returns 0 on error and 1 on success. */ static int _not_NoValue(PyObject *obj, PyObject **out) { static PyObject *NoValue = NULL; npy_cache_import("numpy", "_NoValue", &NoValue); if (NoValue == NULL) { return 0; } if (obj == NoValue) { *out = NULL; } else { *out = obj; } return 1; } /* forward declaration */ static PyArray_DTypeMeta * _get_dtype(PyObject *dtype_obj); /* * This code handles reduce, reduceat, and accumulate * (accumulate and reduce are special cases of the more general reduceat * but they are handled separately for speed) */ static PyObject * PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames, int operation) { int i, naxes=0, ndim; int axes[NPY_MAXDIMS]; ufunc_full_args full_args = {NULL, NULL}; PyObject *axes_obj = NULL; PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL; PyObject *op = NULL; PyArrayObject *indices = NULL; PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL}; PyArrayObject *out = NULL; int keepdims = 0; PyObject *initial = NULL; npy_bool out_is_passed_by_position; static char *_reduce_type[] = {"reduce", "accumulate", "reduceat", NULL}; if (ufunc == NULL) { PyErr_SetString(PyExc_ValueError, "function not supported"); return NULL; } if (ufunc->core_enabled) { PyErr_Format(PyExc_RuntimeError, "Reduction not defined on ufunc with signature"); return NULL; } if (ufunc->nin != 2) { PyErr_Format(PyExc_ValueError, "%s only supported for binary functions", _reduce_type[operation]); return NULL; } if (ufunc->nout != 1) { PyErr_Format(PyExc_ValueError, "%s only supported for functions " "returning a single value", _reduce_type[operation]); return NULL; } /* * Perform argument parsing, but start by only extracting. This is * just to preserve the behaviour that __array_ufunc__ did not perform * any checks on arguments, and we could change this or change it for * certain parameters. */ PyObject *otype_obj = NULL, *out_obj = NULL, *indices_obj = NULL; PyObject *keepdims_obj = NULL, *wheremask_obj = NULL; if (operation == UFUNC_REDUCEAT) { NPY_PREPARE_ARGPARSER; if (npy_parse_arguments("reduceat", args, len_args, kwnames, "array", NULL, &op, "indices", NULL, &indices_obj, "|axis", NULL, &axes_obj, "|dtype", NULL, &otype_obj, "|out", NULL, &out_obj, NULL, NULL, NULL) < 0) { goto fail; } /* Prepare inputs for PyUfunc_CheckOverride */ full_args.in = PyTuple_Pack(2, op, indices_obj); if (full_args.in == NULL) { goto fail; } out_is_passed_by_position = len_args >= 5; } else if (operation == UFUNC_ACCUMULATE) { NPY_PREPARE_ARGPARSER; if (npy_parse_arguments("accumulate", args, len_args, kwnames, "array", NULL, &op, "|axis", NULL, &axes_obj, "|dtype", NULL, &otype_obj, "|out", NULL, &out_obj, NULL, NULL, NULL) < 0) { goto fail; } /* Prepare input for PyUfunc_CheckOverride */ full_args.in = PyTuple_Pack(1, op); if (full_args.in == NULL) { goto fail; } out_is_passed_by_position = len_args >= 4; } else { NPY_PREPARE_ARGPARSER; if (npy_parse_arguments("reduce", args, len_args, kwnames, "array", NULL, &op, "|axis", NULL, &axes_obj, "|dtype", NULL, &otype_obj, "|out", NULL, &out_obj, "|keepdims", NULL, &keepdims_obj, "|initial", &_not_NoValue, &initial, "|where", NULL, &wheremask_obj, NULL, NULL, NULL) < 0) { goto fail; } /* Prepare input for PyUfunc_CheckOverride */ full_args.in = PyTuple_Pack(1, op); if (full_args.in == NULL) { goto fail; } out_is_passed_by_position = len_args >= 4; } /* Normalize output for PyUFunc_CheckOverride and conversion. */ if (out_is_passed_by_position) { /* in this branch, out is always wrapped in a tuple. */ if (out_obj != Py_None) { full_args.out = PyTuple_Pack(1, out_obj); if (full_args.out == NULL) { goto fail; } } } else if (out_obj) { if (_set_full_args_out(1, out_obj, &full_args) < 0) { goto fail; } /* Ensure that out_obj is the array, not the tuple: */ if (full_args.out != NULL) { out_obj = PyTuple_GET_ITEM(full_args.out, 0); } } /* We now have all the information required to check for Overrides */ PyObject *override = NULL; int errval = PyUFunc_CheckOverride(ufunc, _reduce_type[operation], full_args.in, full_args.out, wheremask_obj, args, len_args, kwnames, &override); if (errval) { return NULL; } else if (override) { Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); return override; } /* Finish parsing of all parameters (no matter which reduce-like) */ if (indices_obj) { PyArray_Descr *indtype = PyArray_DescrFromType(NPY_INTP); indices = (PyArrayObject *)PyArray_FromAny(indices_obj, indtype, 1, 1, NPY_ARRAY_CARRAY, NULL); if (indices == NULL) { goto fail; } } if (otype_obj && otype_obj != Py_None) { /* Use `_get_dtype` because `dtype` is a DType and not the instance */ signature[0] = _get_dtype(otype_obj); if (signature[0] == NULL) { goto fail; } } if (out_obj && !PyArray_OutputConverter(out_obj, &out)) { goto fail; } if (keepdims_obj && !PyArray_PythonPyIntFromInt(keepdims_obj, &keepdims)) { goto fail; } if (wheremask_obj && !_wheremask_converter(wheremask_obj, &wheremask)) { goto fail; } /* Ensure input is an array */ mp = (PyArrayObject *)PyArray_FromAny(op, NULL, 0, 0, 0, NULL); if (mp == NULL) { goto fail; } ndim = PyArray_NDIM(mp); /* Convert the 'axis' parameter into a list of axes */ if (axes_obj == NULL) { /* apply defaults */ if (ndim == 0) { naxes = 0; } else { naxes = 1; axes[0] = 0; } } else if (axes_obj == Py_None) { /* Convert 'None' into all the axes */ naxes = ndim; for (i = 0; i < naxes; ++i) { axes[i] = i; } } else if (PyTuple_Check(axes_obj)) { naxes = PyTuple_Size(axes_obj); if (naxes < 0 || naxes > NPY_MAXDIMS) { PyErr_SetString(PyExc_ValueError, "too many values for 'axis'"); goto fail; } for (i = 0; i < naxes; ++i) { PyObject *tmp = PyTuple_GET_ITEM(axes_obj, i); int axis = PyArray_PyIntAsInt(tmp); if (error_converting(axis)) { goto fail; } if (check_and_adjust_axis(&axis, ndim) < 0) { goto fail; } axes[i] = (int)axis; } } else { /* Try to interpret axis as an integer */ int axis = PyArray_PyIntAsInt(axes_obj); /* TODO: PyNumber_Index would be good to use here */ if (error_converting(axis)) { goto fail; } /* * As a special case for backwards compatibility in 'sum', * 'prod', et al, also allow a reduction for scalars even * though this is technically incorrect. */ if (ndim == 0 && (axis == 0 || axis == -1)) { naxes = 0; } else if (check_and_adjust_axis(&axis, ndim) < 0) { goto fail; } else { axes[0] = (int)axis; naxes = 1; } } switch(operation) { case UFUNC_REDUCE: ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes, signature, keepdims, initial, wheremask); Py_XSETREF(wheremask, NULL); break; case UFUNC_ACCUMULATE: if (ndim == 0) { PyErr_SetString(PyExc_TypeError, "cannot accumulate on a scalar"); goto fail; } if (naxes != 1) { PyErr_SetString(PyExc_ValueError, "accumulate does not allow multiple axes"); goto fail; } ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0], signature); break; case UFUNC_REDUCEAT: if (ndim == 0) { PyErr_SetString(PyExc_TypeError, "cannot reduceat on a scalar"); goto fail; } if (naxes != 1) { PyErr_SetString(PyExc_ValueError, "reduceat does not allow multiple axes"); goto fail; } ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc, mp, indices, out, axes[0], signature); Py_SETREF(indices, NULL); break; } if (ret == NULL) { goto fail; } Py_DECREF(signature[0]); Py_DECREF(signature[1]); Py_DECREF(signature[2]); Py_DECREF(mp); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); /* Wrap and return the output */ { /* Find __array_wrap__ - note that these rules are different to the * normal ufunc path */ PyObject *wrap; if (out != NULL) { wrap = Py_None; Py_INCREF(wrap); } else if (Py_TYPE(op) != Py_TYPE(ret)) { wrap = PyObject_GetAttr(op, npy_um_str_array_wrap); if (wrap == NULL) { PyErr_Clear(); } else if (!PyCallable_Check(wrap)) { Py_DECREF(wrap); wrap = NULL; } } else { wrap = NULL; } return _apply_array_wrap(wrap, ret, NULL); } fail: Py_XDECREF(signature[0]); Py_XDECREF(signature[1]); Py_XDECREF(signature[2]); Py_XDECREF(mp); Py_XDECREF(wheremask); Py_XDECREF(indices); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); return NULL; } /* * Perform a basic check on `dtype`, `sig`, and `signature` since only one * may be set. If `sig` is used, writes it into `out_signature` (which should * be set to `signature_obj` so that following code only requires to handle * `signature_obj`). * * Does NOT incref the output! This only copies the borrowed references * gotten during the argument parsing. * * This function does not do any normalization of the input dtype tuples, * this happens after the array-ufunc override check currently. */ static int _check_and_copy_sig_to_signature( PyObject *sig_obj, PyObject *signature_obj, PyObject *dtype, PyObject **out_signature) { *out_signature = NULL; if (signature_obj != NULL) { *out_signature = signature_obj; } if (sig_obj != NULL) { if (*out_signature != NULL) { PyErr_SetString(PyExc_TypeError, "cannot specify both 'sig' and 'signature'"); *out_signature = NULL; return -1; } *out_signature = sig_obj; } if (dtype != NULL) { if (*out_signature != NULL) { PyErr_SetString(PyExc_TypeError, "cannot specify both 'signature' and 'dtype'"); return -1; } /* dtype needs to be converted, delay after the override check */ } return 0; } /* * Note: This function currently lets DType classes pass, but in general * the class (not the descriptor instance) is the preferred input, so the * parsing should eventually be adapted to prefer classes and possible * deprecated instances. (Users should not notice that much, since `np.float64` * or "float64" usually denotes the DType class rather than the instance.) */ static PyArray_DTypeMeta * _get_dtype(PyObject *dtype_obj) { if (PyObject_TypeCheck(dtype_obj, &PyArrayDTypeMeta_Type)) { Py_INCREF(dtype_obj); return (PyArray_DTypeMeta *)dtype_obj; } else { PyArray_Descr *descr = NULL; if (!PyArray_DescrConverter(dtype_obj, &descr)) { return NULL; } PyArray_DTypeMeta *out = NPY_DTYPE(descr); if (NPY_UNLIKELY(!NPY_DT_is_legacy(out))) { /* TODO: this path was unreachable when added. */ PyErr_SetString(PyExc_TypeError, "Cannot pass a new user DType instance to the `dtype` or " "`signature` arguments of ufuncs. Pass the DType class " "instead."); Py_DECREF(descr); return NULL; } else if (NPY_UNLIKELY(out->singleton != descr)) { /* This does not warn about `metadata`, but units is important. */ if (out->singleton == NULL || !PyArray_EquivTypes(out->singleton, descr)) { PyErr_SetString(PyExc_TypeError, "The `dtype` and `signature` arguments to " "ufuncs only select the general DType and not details " "such as the byte order or time unit. " "You can avoid this error by using the scalar types " "`np.float64` or the dtype string notation."); Py_DECREF(descr); return NULL; } } Py_INCREF(out); Py_DECREF(descr); return out; } } /* * Finish conversion parsing of the DType signature. NumPy always only * honored the type number for passed in descriptors/dtypes. * The `dtype` argument is interpreted as the first output DType (not * descriptor). * Unlike the dtype of an `out` array, it influences loop selection! * * It is the callers responsibility to clean `signature` and NULL it before * calling. */ static int _get_fixed_signature(PyUFuncObject *ufunc, PyObject *dtype_obj, PyObject *signature_obj, PyArray_DTypeMeta **signature) { if (dtype_obj == NULL && signature_obj == NULL) { return 0; } int nin = ufunc->nin, nout = ufunc->nout, nop = nin + nout; if (dtype_obj != NULL) { if (dtype_obj == Py_None) { /* If `dtype=None` is passed, no need to do anything */ return 0; } if (nout == 0) { /* This may be allowed (NumPy does not do this)? */ PyErr_SetString(PyExc_TypeError, "Cannot provide `dtype` when a ufunc has no outputs"); return -1; } PyArray_DTypeMeta *dtype = _get_dtype(dtype_obj); if (dtype == NULL) { return -1; } for (int i = nin; i < nop; i++) { Py_INCREF(dtype); signature[i] = dtype; } Py_DECREF(dtype); return 0; } assert(signature_obj != NULL); /* Fill in specified_types from the tuple or string (signature_obj) */ if (PyTuple_Check(signature_obj)) { Py_ssize_t n = PyTuple_GET_SIZE(signature_obj); if (n == 1 && nop != 1) { /* * Special handling, because we deprecate this path. The path * probably mainly existed since the `dtype=obj` was passed through * as `(obj,)` and parsed later. */ if (PyTuple_GET_ITEM(signature_obj, 0) == Py_None) { PyErr_SetString(PyExc_TypeError, "a single item type tuple cannot contain None."); return -1; } if (DEPRECATE("The use of a length 1 tuple for the ufunc " "`signature` is deprecated. Use `dtype` or fill the" "tuple with `None`s.") < 0) { return -1; } /* Use the same logic as for `dtype=` */ return _get_fixed_signature(ufunc, PyTuple_GET_ITEM(signature_obj, 0), NULL, signature); } if (n != nop) { PyErr_Format(PyExc_ValueError, "a type-tuple must be specified of length %d for ufunc '%s'", nop, ufunc_get_name_cstr(ufunc)); return -1; } for (int i = 0; i < nop; ++i) { PyObject *item = PyTuple_GET_ITEM(signature_obj, i); if (item == Py_None) { continue; } else { signature[i] = _get_dtype(item); if (signature[i] == NULL) { return -1; } else if (i < nin && NPY_DT_is_abstract(signature[i])) { /* * We reject abstract input signatures for now. These * can probably be defined by finding the common DType with * the actual input and using the result of this for the * promotion. */ PyErr_SetString(PyExc_TypeError, "Input DTypes to the signature must not be " "abstract. The behaviour may be defined in the " "future."); return -1; } } } } else if (PyBytes_Check(signature_obj) || PyUnicode_Check(signature_obj)) { PyObject *str_object = NULL; if (PyBytes_Check(signature_obj)) { str_object = PyUnicode_FromEncodedObject(signature_obj, NULL, NULL); if (str_object == NULL) { return -1; } } else { Py_INCREF(signature_obj); str_object = signature_obj; } Py_ssize_t length; const char *str = PyUnicode_AsUTF8AndSize(str_object, &length); if (str == NULL) { Py_DECREF(str_object); return -1; } if (length != 1 && (length != nin+nout + 2 || str[nin] != '-' || str[nin+1] != '>')) { PyErr_Format(PyExc_ValueError, "a type-string for %s, %d typecode(s) before and %d after " "the -> sign", ufunc_get_name_cstr(ufunc), nin, nout); Py_DECREF(str_object); return -1; } if (length == 1 && nin+nout != 1) { Py_DECREF(str_object); if (DEPRECATE("The use of a length 1 string for the ufunc " "`signature` is deprecated. Use `dtype` attribute or " "pass a tuple with `None`s.") < 0) { return -1; } /* `signature="l"` is the same as `dtype="l"` */ return _get_fixed_signature(ufunc, str_object, NULL, signature); } else { for (int i = 0; i < nin+nout; ++i) { npy_intp istr = i < nin ? i : i+2; PyArray_Descr *descr = PyArray_DescrFromType(str[istr]); if (descr == NULL) { Py_DECREF(str_object); return -1; } signature[i] = NPY_DTYPE(descr); Py_INCREF(signature[i]); Py_DECREF(descr); } Py_DECREF(str_object); } } else { PyErr_SetString(PyExc_TypeError, "the signature object to ufunc must be a string or a tuple."); return -1; } return 0; } /* * Fill in the actual descriptors used for the operation. This function * supports falling back to the legacy `ufunc->type_resolver`. * * We guarantee the array-method that all passed in descriptors are of the * correct DType instance (i.e. a string can just fetch the length, it doesn't * need to "cast" to string first). */ static int resolve_descriptors(int nop, PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl, PyArrayObject *operands[], PyArray_Descr *dtypes[], PyArray_DTypeMeta *signature[], NPY_CASTING casting) { int retval = -1; PyArray_Descr *original_dtypes[NPY_MAXARGS]; for (int i = 0; i < nop; ++i) { if (operands[i] == NULL) { original_dtypes[i] = NULL; } else { /* * The dtype may mismatch the signature, in which case we need * to make it fit before calling the resolution. */ PyArray_Descr *descr = PyArray_DTYPE(operands[i]); original_dtypes[i] = PyArray_CastDescrToDType(descr, signature[i]); if (original_dtypes[i] == NULL) { nop = i; /* only this much is initialized */ goto finish; } } } NPY_UF_DBG_PRINT("Resolving the descriptors\n"); if (ufuncimpl->resolve_descriptors != &wrapped_legacy_resolve_descriptors) { /* The default: use the `ufuncimpl` as nature intended it */ npy_intp view_offset = NPY_MIN_INTP; /* currently ignored */ NPY_CASTING safety = ufuncimpl->resolve_descriptors(ufuncimpl, signature, original_dtypes, dtypes, &view_offset); if (safety < 0) { goto finish; } if (NPY_UNLIKELY(PyArray_MinCastSafety(safety, casting) != casting)) { /* TODO: Currently impossible to reach (specialized unsafe loop) */ PyErr_Format(PyExc_TypeError, "The ufunc implementation for %s with the given dtype " "signature is not possible under the casting rule %s", ufunc_get_name_cstr(ufunc), npy_casting_to_string(casting)); goto finish; } retval = 0; } else { /* * Fall-back to legacy resolver using `operands`, used exclusively * for datetime64/timedelta64 and custom ufuncs (in pyerfa/astropy). */ retval = ufunc->type_resolver(ufunc, casting, operands, NULL, dtypes); } finish: for (int i = 0; i < nop; i++) { Py_XDECREF(original_dtypes[i]); } return retval; } /** * Wraps all outputs and returns the result (which may be NULL on error). * * Use __array_wrap__ on all outputs * if present on one of the input arguments. * If present for multiple inputs: * use __array_wrap__ of input object with largest * __array_priority__ (default = 0.0) * * Exception: we should not wrap outputs for items already * passed in as output-arguments. These items should either * be left unwrapped or wrapped by calling their own __array_wrap__ * routine. * * For each output argument, wrap will be either * NULL --- call PyArray_Return() -- default if no output arguments given * None --- array-object passed in don't call PyArray_Return * method --- the __array_wrap__ method to call. * * @param ufunc * @param full_args Original inputs and outputs * @param subok Whether subclasses are allowed * @param result_arrays The ufunc result(s). REFERENCES ARE STOLEN! */ static PyObject * replace_with_wrapped_result_and_return(PyUFuncObject *ufunc, ufunc_full_args full_args, npy_bool subok, PyArrayObject *result_arrays[]) { PyObject *retobj[NPY_MAXARGS]; PyObject *wraparr[NPY_MAXARGS]; _find_array_wrap(full_args, subok, wraparr, ufunc->nin, ufunc->nout); /* wrap outputs */ for (int i = 0; i < ufunc->nout; i++) { _ufunc_context context; context.ufunc = ufunc; context.args = full_args; context.out_i = i; retobj[i] = _apply_array_wrap(wraparr[i], result_arrays[i], &context); result_arrays[i] = NULL; /* Was DECREF'ed and (probably) wrapped */ if (retobj[i] == NULL) { goto fail; } } if (ufunc->nout == 1) { return retobj[0]; } else { PyObject *result = PyTuple_New(ufunc->nout); if (result == NULL) { return NULL; } for (int i = 0; i < ufunc->nout; i++) { PyTuple_SET_ITEM(result, i, retobj[i]); } return result; } fail: for (int i = 0; i < ufunc->nout; i++) { if (result_arrays[i] != NULL) { Py_DECREF(result_arrays[i]); } else { Py_XDECREF(retobj[i]); } } return NULL; } /* * Main ufunc call implementation. * * This implementation makes use of the "fastcall" way of passing keyword * arguments and is called directly from `ufunc_generic_vectorcall` when * Python has `tp_vectorcall` (Python 3.8+). * If `tp_vectorcall` is not available, the dictionary `kwargs` are unpacked in * `ufunc_generic_call` with fairly little overhead. */ static PyObject * ufunc_generic_fastcall(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames, npy_bool outer) { int errval; int nin = ufunc->nin, nout = ufunc->nout, nop = ufunc->nargs; /* All following variables are cleared in the `fail` error path */ ufunc_full_args full_args; PyArrayObject *wheremask = NULL; PyArray_DTypeMeta *signature[NPY_MAXARGS]; PyArrayObject *operands[NPY_MAXARGS]; PyArray_DTypeMeta *operand_DTypes[NPY_MAXARGS]; PyArray_Descr *operation_descrs[NPY_MAXARGS]; PyObject *output_array_prepare[NPY_MAXARGS]; /* Initialize all arrays (we usually only need a small part) */ memset(signature, 0, nop * sizeof(*signature)); memset(operands, 0, nop * sizeof(*operands)); memset(operand_DTypes, 0, nop * sizeof(*operation_descrs)); memset(operation_descrs, 0, nop * sizeof(*operation_descrs)); memset(output_array_prepare, 0, nout * sizeof(*output_array_prepare)); /* * Note that the input (and possibly output) arguments are passed in as * positional arguments. We extract these first and check for `out` * passed by keyword later. * Outputs and inputs are stored in `full_args.in` and `full_args.out` * as tuples (or NULL when no outputs are passed). */ /* Check number of arguments */ if (NPY_UNLIKELY((len_args < nin) || (len_args > nop))) { PyErr_Format(PyExc_TypeError, "%s() takes from %d to %d positional arguments but " "%zd were given", ufunc_get_name_cstr(ufunc) , nin, nop, len_args); return NULL; } /* Fetch input arguments. */ full_args.in = PyArray_TupleFromItems(ufunc->nin, args, 0); if (full_args.in == NULL) { return NULL; } /* * If there are more arguments, they define the out args. Otherwise * full_args.out is NULL for now, and the `out` kwarg may still be passed. */ npy_bool out_is_passed_by_position = len_args > nin; if (out_is_passed_by_position) { npy_bool all_none = NPY_TRUE; full_args.out = PyTuple_New(nout); if (full_args.out == NULL) { goto fail; } for (int i = nin; i < nop; i++) { PyObject *tmp; if (i < (int)len_args) { tmp = args[i]; if (tmp != Py_None) { all_none = NPY_FALSE; } } else { tmp = Py_None; } Py_INCREF(tmp); PyTuple_SET_ITEM(full_args.out, i-nin, tmp); } if (all_none) { Py_SETREF(full_args.out, NULL); } } else { full_args.out = NULL; } /* * We have now extracted (but not converted) the input arguments. * To simplify overrides, extract all other arguments (as objects only) */ PyObject *out_obj = NULL, *where_obj = NULL; PyObject *axes_obj = NULL, *axis_obj = NULL; PyObject *keepdims_obj = NULL, *casting_obj = NULL, *order_obj = NULL; PyObject *subok_obj = NULL, *signature_obj = NULL, *sig_obj = NULL; PyObject *dtype_obj = NULL, *extobj = NULL; /* Skip parsing if there are no keyword arguments, nothing left to do */ if (kwnames != NULL) { if (!ufunc->core_enabled) { NPY_PREPARE_ARGPARSER; if (npy_parse_arguments(ufunc->name, args + len_args, 0, kwnames, "$out", NULL, &out_obj, "$where", NULL, &where_obj, "$casting", NULL, &casting_obj, "$order", NULL, &order_obj, "$subok", NULL, &subok_obj, "$dtype", NULL, &dtype_obj, "$signature", NULL, &signature_obj, "$sig", NULL, &sig_obj, "$extobj", NULL, &extobj, NULL, NULL, NULL) < 0) { goto fail; } } else { NPY_PREPARE_ARGPARSER; if (npy_parse_arguments(ufunc->name, args + len_args, 0, kwnames, "$out", NULL, &out_obj, "$axes", NULL, &axes_obj, "$axis", NULL, &axis_obj, "$keepdims", NULL, &keepdims_obj, "$casting", NULL, &casting_obj, "$order", NULL, &order_obj, "$subok", NULL, &subok_obj, "$dtype", NULL, &dtype_obj, "$signature", NULL, &signature_obj, "$sig", NULL, &sig_obj, "$extobj", NULL, &extobj, NULL, NULL, NULL) < 0) { goto fail; } if (NPY_UNLIKELY((axes_obj != NULL) && (axis_obj != NULL))) { PyErr_SetString(PyExc_TypeError, "cannot specify both 'axis' and 'axes'"); goto fail; } } /* Handle `out` arguments passed by keyword */ if (out_obj != NULL) { if (out_is_passed_by_position) { PyErr_SetString(PyExc_TypeError, "cannot specify 'out' as both a " "positional and keyword argument"); goto fail; } if (_set_full_args_out(nout, out_obj, &full_args) < 0) { goto fail; } } /* * Only one of signature, sig, and dtype should be passed. If `sig` * was passed, this puts it into `signature_obj` instead (these * are borrowed references). */ if (_check_and_copy_sig_to_signature( sig_obj, signature_obj, dtype_obj, &signature_obj) < 0) { goto fail; } } char *method; if (!outer) { method = "__call__"; } else { method = "outer"; } /* We now have all the information required to check for Overrides */ PyObject *override = NULL; errval = PyUFunc_CheckOverride(ufunc, method, full_args.in, full_args.out, where_obj, args, len_args, kwnames, &override); if (errval) { goto fail; } else if (override) { Py_DECREF(full_args.in); Py_XDECREF(full_args.out); return override; } if (outer) { /* Outer uses special preparation of inputs (expand dims) */ PyObject *new_in = prepare_input_arguments_for_outer(full_args.in, ufunc); if (new_in == NULL) { goto fail; } Py_SETREF(full_args.in, new_in); } /* * Parse the passed `dtype` or `signature` into an array containing * PyArray_DTypeMeta and/or None. */ if (_get_fixed_signature(ufunc, dtype_obj, signature_obj, signature) < 0) { goto fail; } NPY_ORDER order = NPY_KEEPORDER; NPY_CASTING casting = NPY_DEFAULT_ASSIGN_CASTING; npy_bool subok = NPY_TRUE; int keepdims = -1; /* We need to know if it was passed */ npy_bool force_legacy_promotion; npy_bool allow_legacy_promotion; npy_bool promoting_pyscalars; if (convert_ufunc_arguments(ufunc, /* extract operand related information: */ full_args, operands, operand_DTypes, &force_legacy_promotion, &allow_legacy_promotion, &promoting_pyscalars, /* extract general information: */ order_obj, &order, casting_obj, &casting, subok_obj, &subok, where_obj, &wheremask, keepdims_obj, &keepdims) < 0) { goto fail; } /* * Note that part of the promotion is to the complete the signature * (until here it only represents the fixed part and is usually NULLs). * * After promotion, we could push the following logic into the ArrayMethod * in the future. For now, we do it here. The type resolution step can * be shared between the ufunc and gufunc code. */ PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, operands, signature, operand_DTypes, force_legacy_promotion, allow_legacy_promotion, promoting_pyscalars, NPY_FALSE); if (ufuncimpl == NULL) { goto fail; } /* Find the correct descriptors for the operation */ if (resolve_descriptors(nop, ufunc, ufuncimpl, operands, operation_descrs, signature, casting) < 0) { goto fail; } if (promoting_pyscalars) { /* * Python integers need to be cast specially. For other python * scalars it does not hurt either. It would be nice to never create * the array in this case, but that is difficult until value-based * promotion rules are gone. (After that, we may get away with using * dummy arrays rather than real arrays for the legacy resolvers.) */ for (int i = 0; i < nin; i++) { int orig_flags = PyArray_FLAGS(operands[i]); if (!(orig_flags & NPY_ARRAY_WAS_PYTHON_LITERAL)) { continue; } /* * If descriptor matches, no need to convert, but integers may * have been too large. */ if (!(orig_flags & NPY_ARRAY_WAS_INT_AND_REPLACED) && PyArray_EquivTypes( PyArray_DESCR(operands[i]), operation_descrs[i])) { continue; } /* Otherwise, replace the operand with a new array */ PyArray_Descr *descr = operation_descrs[i]; Py_INCREF(descr); PyArrayObject *new = (PyArrayObject *)PyArray_NewFromDescr( &PyArray_Type, descr, 0, NULL, NULL, NULL, 0, NULL); Py_SETREF(operands[i], new); if (operands[i] == NULL) { goto fail; } PyObject *value = PyTuple_GET_ITEM(full_args.in, i); if (PyArray_SETITEM(new, PyArray_BYTES(operands[i]), value) < 0) { goto fail; } } } if (subok) { _find_array_prepare(full_args, output_array_prepare, nout); } /* * Do the final preparations and call the inner-loop. */ if (!ufunc->core_enabled) { errval = PyUFunc_GenericFunctionInternal(ufunc, ufuncimpl, operation_descrs, operands, extobj, casting, order, output_array_prepare, full_args, /* for __array_prepare__ */ wheremask); } else { errval = PyUFunc_GeneralizedFunctionInternal(ufunc, ufuncimpl, operation_descrs, operands, extobj, casting, order, /* GUFuncs never (ever) called __array_prepare__! */ axis_obj, axes_obj, keepdims); } if (errval < 0) { goto fail; } /* * Clear all variables which are not needed any further. * (From here on, we cannot `goto fail` any more.) */ Py_XDECREF(wheremask); for (int i = 0; i < nop; i++) { Py_DECREF(signature[i]); Py_XDECREF(operand_DTypes[i]); Py_DECREF(operation_descrs[i]); if (i < nin) { Py_DECREF(operands[i]); } else { Py_XDECREF(output_array_prepare[i-nin]); } } /* The following steals the references to the outputs: */ PyObject *result = replace_with_wrapped_result_and_return(ufunc, full_args, subok, operands+nin); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); return result; fail: Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); Py_XDECREF(wheremask); for (int i = 0; i < ufunc->nargs; i++) { Py_XDECREF(operands[i]); Py_XDECREF(signature[i]); Py_XDECREF(operand_DTypes[i]); Py_XDECREF(operation_descrs[i]); if (i < nout) { Py_XDECREF(output_array_prepare[i]); } } return NULL; } /* * Implement vectorcallfunc which should be defined with Python 3.8+. * In principle this could be backported, but the speed gain seems moderate * since ufunc calls often do not have keyword arguments and always have * a large overhead. The only user would potentially be cython probably. */ static PyObject * ufunc_generic_vectorcall(PyObject *ufunc, PyObject *const *args, size_t len_args, PyObject *kwnames) { /* * Unlike METH_FASTCALL, `len_args` may have a flag to signal that * args[-1] may be (temporarily) used. So normalize it here. */ return ufunc_generic_fastcall((PyUFuncObject *)ufunc, args, PyVectorcall_NARGS(len_args), kwnames, NPY_FALSE); } NPY_NO_EXPORT PyObject * ufunc_geterr(PyObject *NPY_UNUSED(dummy), PyObject *NPY_UNUSED(arg)) { PyObject *thedict; PyObject *res; thedict = PyThreadState_GetDict(); if (thedict == NULL) { thedict = PyEval_GetBuiltins(); } res = PyDict_GetItemWithError(thedict, npy_um_str_pyvals_name); if (res == NULL && PyErr_Occurred()) { return NULL; } else if (res != NULL) { Py_INCREF(res); return res; } /* Construct list of defaults */ res = PyList_New(3); if (res == NULL) { return NULL; } PyList_SET_ITEM(res, 0, PyLong_FromLong(NPY_BUFSIZE)); PyList_SET_ITEM(res, 1, PyLong_FromLong(UFUNC_ERR_DEFAULT)); PyList_SET_ITEM(res, 2, Py_None); Py_INCREF(Py_None); return res; } NPY_NO_EXPORT PyObject * ufunc_seterr(PyObject *NPY_UNUSED(dummy), PyObject *arg) { PyObject *thedict; int res; PyObject *val = arg; static char *msg = "Error object must be a list of length 3"; if (!PyList_CheckExact(val) || PyList_GET_SIZE(val) != 3) { PyErr_SetString(PyExc_ValueError, msg); return NULL; } thedict = PyThreadState_GetDict(); if (thedict == NULL) { thedict = PyEval_GetBuiltins(); } res = PyDict_SetItem(thedict, npy_um_str_pyvals_name, val); if (res < 0) { return NULL; } #if USE_USE_DEFAULTS==1 if (ufunc_update_use_defaults() < 0) { return NULL; } #endif Py_RETURN_NONE; } /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_ReplaceLoopBySignature(PyUFuncObject *func, PyUFuncGenericFunction newfunc, const int *signature, PyUFuncGenericFunction *oldfunc) { int i, j; int res = -1; /* Find the location of the matching signature */ for (i = 0; i < func->ntypes; i++) { for (j = 0; j < func->nargs; j++) { if (signature[j] != func->types[i*func->nargs+j]) { break; } } if (j < func->nargs) { continue; } if (oldfunc != NULL) { *oldfunc = func->functions[i]; } func->functions[i] = newfunc; res = 0; break; } return res; } /*UFUNC_API*/ NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndData(PyUFuncGenericFunction *func, void **data, char *types, int ntypes, int nin, int nout, int identity, const char *name, const char *doc, int unused) { return PyUFunc_FromFuncAndDataAndSignature(func, data, types, ntypes, nin, nout, identity, name, doc, unused, NULL); } /*UFUNC_API*/ NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, char *types, int ntypes, int nin, int nout, int identity, const char *name, const char *doc, int unused, const char *signature) { return PyUFunc_FromFuncAndDataAndSignatureAndIdentity( func, data, types, ntypes, nin, nout, identity, name, doc, unused, signature, NULL); } /*UFUNC_API*/ NPY_NO_EXPORT PyObject * PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, void **data, char *types, int ntypes, int nin, int nout, int identity, const char *name, const char *doc, const int unused, const char *signature, PyObject *identity_value) { PyUFuncObject *ufunc; if (nin + nout > NPY_MAXARGS) { PyErr_Format(PyExc_ValueError, "Cannot construct a ufunc with more than %d operands " "(requested number were: inputs = %d and outputs = %d)", NPY_MAXARGS, nin, nout); return NULL; } ufunc = PyObject_GC_New(PyUFuncObject, &PyUFunc_Type); /* * We use GC_New here for ufunc->obj, but do not use GC_Track since * ufunc->obj is still NULL at the end of this function. * See ufunc_frompyfunc where ufunc->obj is set and GC_Track is called. */ if (ufunc == NULL) { return NULL; } ufunc->nin = nin; ufunc->nout = nout; ufunc->nargs = nin+nout; ufunc->identity = identity; if (ufunc->identity == PyUFunc_IdentityValue) { Py_INCREF(identity_value); ufunc->identity_value = identity_value; } else { ufunc->identity_value = NULL; } ufunc->functions = func; ufunc->data = data; ufunc->types = types; ufunc->ntypes = ntypes; ufunc->core_signature = NULL; ufunc->core_enabled = 0; ufunc->obj = NULL; ufunc->core_num_dims = NULL; ufunc->core_num_dim_ix = 0; ufunc->core_offsets = NULL; ufunc->core_dim_ixs = NULL; ufunc->core_dim_sizes = NULL; ufunc->core_dim_flags = NULL; ufunc->userloops = NULL; ufunc->ptr = NULL; ufunc->vectorcall = &ufunc_generic_vectorcall; ufunc->reserved1 = 0; ufunc->iter_flags = 0; /* Type resolution and inner loop selection functions */ ufunc->type_resolver = &PyUFunc_DefaultTypeResolver; ufunc->legacy_inner_loop_selector = &PyUFunc_DefaultLegacyInnerLoopSelector; ufunc->_always_null_previously_masked_innerloop_selector = NULL; ufunc->op_flags = NULL; ufunc->_loops = NULL; if (nin + nout != 0) { ufunc->_dispatch_cache = PyArrayIdentityHash_New(nin + nout); if (ufunc->_dispatch_cache == NULL) { Py_DECREF(ufunc); return NULL; } } else { /* * Work around a test that seems to do this right now, it should not * be a valid ufunc at all though, so. TODO: Remove... */ ufunc->_dispatch_cache = NULL; } ufunc->_loops = PyList_New(0); if (ufunc->_loops == NULL) { Py_DECREF(ufunc); return NULL; } if (name == NULL) { ufunc->name = "?"; } else { ufunc->name = name; } ufunc->doc = doc; ufunc->op_flags = PyArray_malloc(sizeof(npy_uint32)*ufunc->nargs); if (ufunc->op_flags == NULL) { Py_DECREF(ufunc); return PyErr_NoMemory(); } memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs); if (signature != NULL) { if (_parse_signature(ufunc, signature) != 0) { Py_DECREF(ufunc); return NULL; } } char *curr_types = ufunc->types; for (int i = 0; i < ntypes * (nin + nout); i += nin + nout) { /* * Add all legacy wrapping loops here. This is normally not necessary, * but makes sense. It could also help/be needed to avoid issues with * ambiguous loops such as: `OO->?` and `OO->O` where in theory the * wrong loop could be picked if only the second one is added. */ PyObject *info; PyArray_DTypeMeta *op_dtypes[NPY_MAXARGS]; for (int arg = 0; arg < nin + nout; arg++) { op_dtypes[arg] = PyArray_DTypeFromTypeNum(curr_types[arg]); /* These DTypes are immortal and adding INCREFs: so borrow it */ Py_DECREF(op_dtypes[arg]); } curr_types += nin + nout; info = add_and_return_legacy_wrapping_ufunc_loop(ufunc, op_dtypes, 1); if (info == NULL) { Py_DECREF(ufunc); return NULL; } } /* * TODO: I tried adding a default promoter here (either all object for * some special cases, or all homogeneous). Those are reasonable * defaults, but short-cut a deprecated SciPy loop, where the * homogeneous loop `ddd->d` was deprecated, but an inhomogeneous * one `dld->d` should be picked. * The default promoter *is* a reasonable default, but switched that * behaviour. * Another problem appeared due to buggy type-resolution for * datetimes, this meant that `timedelta.sum(dtype="f8")` returned * datetimes (and not floats or error), arguably wrong, but... */ return (PyObject *)ufunc; } /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_SetUsesArraysAsData(void **NPY_UNUSED(data), size_t NPY_UNUSED(i)) { /* NumPy 1.21, 201-03-29 */ PyErr_SetString(PyExc_RuntimeError, "PyUFunc_SetUsesArraysAsData() C-API function has been " "disabled. It was initially deprecated in NumPy 1.19."); return -1; } /* * This is the first-part of the CObject structure. * * I don't think this will change, but if it should, then * this needs to be fixed. The exposed C-API was insufficient * because I needed to replace the pointer and it wouldn't * let me with a destructor set (even though it works fine * with the destructor). */ typedef struct { PyObject_HEAD void *c_obj; } _simple_cobj; #define _SETCPTR(cobj, val) ((_simple_cobj *)(cobj))->c_obj = (val) /* return 1 if arg1 > arg2, 0 if arg1 == arg2, and -1 if arg1 < arg2 */ static int cmp_arg_types(int *arg1, int *arg2, int n) { for (; n > 0; n--, arg1++, arg2++) { if (PyArray_EquivTypenums(*arg1, *arg2)) { continue; } if (PyArray_CanCastSafely(*arg1, *arg2)) { return -1; } return 1; } return 0; } /* * This frees the linked-list structure when the CObject * is destroyed (removed from the internal dictionary) */ static inline void _free_loop1d_list(PyUFunc_Loop1d *data) { int i; while (data != NULL) { PyUFunc_Loop1d *next = data->next; PyArray_free(data->arg_types); if (data->arg_dtypes != NULL) { for (i = 0; i < data->nargs; i++) { Py_DECREF(data->arg_dtypes[i]); } PyArray_free(data->arg_dtypes); } PyArray_free(data); data = next; } } static void _loop1d_list_free(PyObject *ptr) { PyUFunc_Loop1d *data = (PyUFunc_Loop1d *)PyCapsule_GetPointer(ptr, NULL); _free_loop1d_list(data); } /* * This function allows the user to register a 1-d loop with an already * created ufunc. This function is similar to RegisterLoopForType except * that it allows a 1-d loop to be registered with PyArray_Descr objects * instead of dtype type num values. This allows a 1-d loop to be registered * for a structured array dtype or a custom dtype. The ufunc is called * whenever any of it's input arguments match the user_dtype argument. * * ufunc - ufunc object created from call to PyUFunc_FromFuncAndData * user_dtype - dtype that ufunc will be registered with * function - 1-d loop function pointer * arg_dtypes - array of dtype objects describing the ufunc operands * data - arbitrary data pointer passed in to loop function * * returns 0 on success, -1 for failure */ /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_RegisterLoopForDescr(PyUFuncObject *ufunc, PyArray_Descr *user_dtype, PyUFuncGenericFunction function, PyArray_Descr **arg_dtypes, void *data) { int i; int result = 0; int *arg_typenums; PyObject *key, *cobj; if (user_dtype == NULL) { PyErr_SetString(PyExc_TypeError, "unknown user defined struct dtype"); return -1; } key = PyLong_FromLong((long) user_dtype->type_num); if (key == NULL) { return -1; } arg_typenums = PyArray_malloc(ufunc->nargs * sizeof(int)); if (arg_typenums == NULL) { Py_DECREF(key); PyErr_NoMemory(); return -1; } if (arg_dtypes != NULL) { for (i = 0; i < ufunc->nargs; i++) { arg_typenums[i] = arg_dtypes[i]->type_num; } } else { for (i = 0; i < ufunc->nargs; i++) { arg_typenums[i] = user_dtype->type_num; } } result = PyUFunc_RegisterLoopForType(ufunc, user_dtype->type_num, function, arg_typenums, data); if (result == 0) { cobj = PyDict_GetItemWithError(ufunc->userloops, key); if (cobj == NULL && PyErr_Occurred()) { result = -1; } else if (cobj == NULL) { PyErr_SetString(PyExc_KeyError, "userloop for user dtype not found"); result = -1; } else { int cmp = 1; PyUFunc_Loop1d *current = PyCapsule_GetPointer(cobj, NULL); if (current == NULL) { result = -1; goto done; } while (current != NULL) { cmp = cmp_arg_types(current->arg_types, arg_typenums, ufunc->nargs); if (cmp >= 0 && current->arg_dtypes == NULL) { break; } current = current->next; } if (cmp == 0 && current != NULL && current->arg_dtypes == NULL) { current->arg_dtypes = PyArray_malloc(ufunc->nargs * sizeof(PyArray_Descr*)); if (current->arg_dtypes == NULL) { PyErr_NoMemory(); result = -1; goto done; } else if (arg_dtypes != NULL) { for (i = 0; i < ufunc->nargs; i++) { current->arg_dtypes[i] = arg_dtypes[i]; Py_INCREF(current->arg_dtypes[i]); } } else { for (i = 0; i < ufunc->nargs; i++) { current->arg_dtypes[i] = user_dtype; Py_INCREF(current->arg_dtypes[i]); } } current->nargs = ufunc->nargs; } else { PyErr_SetString(PyExc_RuntimeError, "loop already registered"); result = -1; } } } done: PyArray_free(arg_typenums); Py_DECREF(key); return result; } /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_RegisterLoopForType(PyUFuncObject *ufunc, int usertype, PyUFuncGenericFunction function, const int *arg_types, void *data) { PyArray_Descr *descr; PyUFunc_Loop1d *funcdata; PyObject *key, *cobj; PyArray_DTypeMeta *signature[NPY_MAXARGS]; PyObject *signature_tuple = NULL; int i; int *newtypes=NULL; descr=PyArray_DescrFromType(usertype); if ((usertype < NPY_USERDEF && usertype != NPY_VOID) || (descr==NULL)) { PyErr_SetString(PyExc_TypeError, "unknown user-defined type"); return -1; } Py_DECREF(descr); if (ufunc->userloops == NULL) { ufunc->userloops = PyDict_New(); } key = PyLong_FromLong((long) usertype); if (key == NULL) { return -1; } funcdata = PyArray_malloc(sizeof(PyUFunc_Loop1d)); if (funcdata == NULL) { goto fail; } newtypes = PyArray_malloc(sizeof(int)*ufunc->nargs); if (newtypes == NULL) { goto fail; } if (arg_types != NULL) { for (i = 0; i < ufunc->nargs; i++) { newtypes[i] = arg_types[i]; signature[i] = PyArray_DTypeFromTypeNum(arg_types[i]); Py_DECREF(signature[i]); /* DType can't be deleted... */ } } else { for (i = 0; i < ufunc->nargs; i++) { newtypes[i] = usertype; signature[i] = PyArray_DTypeFromTypeNum(usertype); Py_DECREF(signature[i]); /* DType can't be deleted... */ } } signature_tuple = PyArray_TupleFromItems( ufunc->nargs, (PyObject **)signature, 0); if (signature_tuple == NULL) { goto fail; } /* * We add the loop to the list of all loops and promoters. If the * equivalent loop was already added, skip this. * Note that even then the ufunc is still modified: The legacy ArrayMethod * already looks up the inner-loop from the ufunc (and this is replaced * below!). * If the existing one is not a legacy ArrayMethod, we raise currently: * A new-style loop should not be replaced by an old-style one. */ int add_new_loop = 1; for (Py_ssize_t j = 0; j < PyList_GET_SIZE(ufunc->_loops); j++) { PyObject *item = PyList_GET_ITEM(ufunc->_loops, j); PyObject *existing_tuple = PyTuple_GET_ITEM(item, 0); int cmp = PyObject_RichCompareBool(existing_tuple, signature_tuple, Py_EQ); if (cmp < 0) { goto fail; } if (!cmp) { continue; } PyObject *registered = PyTuple_GET_ITEM(item, 1); if (!PyObject_TypeCheck(registered, &PyArrayMethod_Type) || ( (PyArrayMethodObject *)registered)->get_strided_loop != &get_wrapped_legacy_ufunc_loop) { PyErr_Format(PyExc_TypeError, "A non-compatible loop was already registered for " "ufunc %s and DTypes %S.", ufunc_get_name_cstr(ufunc), signature_tuple); goto fail; } /* The loop was already added */ add_new_loop = 0; break; } if (add_new_loop) { PyObject *info = add_and_return_legacy_wrapping_ufunc_loop( ufunc, signature, 0); if (info == NULL) { goto fail; } } /* Clearing sets it to NULL for the error paths */ Py_CLEAR(signature_tuple); funcdata->func = function; funcdata->arg_types = newtypes; funcdata->data = data; funcdata->next = NULL; funcdata->arg_dtypes = NULL; funcdata->nargs = 0; /* Get entry for this user-defined type*/ cobj = PyDict_GetItemWithError(ufunc->userloops, key); if (cobj == NULL && PyErr_Occurred()) { goto fail; } /* If it's not there, then make one and return. */ else if (cobj == NULL) { cobj = PyCapsule_New((void *)funcdata, NULL, _loop1d_list_free); if (cobj == NULL) { goto fail; } PyDict_SetItem(ufunc->userloops, key, cobj); Py_DECREF(cobj); Py_DECREF(key); return 0; } else { PyUFunc_Loop1d *current, *prev = NULL; int cmp = 1; /* * There is already at least 1 loop. Place this one in * lexicographic order. If the next one signature * is exactly like this one, then just replace. * Otherwise insert. */ current = PyCapsule_GetPointer(cobj, NULL); if (current == NULL) { goto fail; } while (current != NULL) { cmp = cmp_arg_types(current->arg_types, newtypes, ufunc->nargs); if (cmp >= 0) { break; } prev = current; current = current->next; } if (cmp == 0) { /* just replace it with new function */ current->func = function; current->data = data; PyArray_free(newtypes); PyArray_free(funcdata); } else { /* * insert it before the current one by hacking the internals * of cobject to replace the function pointer --- can't use * CObject API because destructor is set. */ funcdata->next = current; if (prev == NULL) { /* place this at front */ _SETCPTR(cobj, funcdata); } else { prev->next = funcdata; } } } Py_DECREF(key); return 0; fail: Py_DECREF(key); Py_XDECREF(signature_tuple); PyArray_free(funcdata); PyArray_free(newtypes); if (!PyErr_Occurred()) PyErr_NoMemory(); return -1; } #undef _SETCPTR static void ufunc_dealloc(PyUFuncObject *ufunc) { PyObject_GC_UnTrack((PyObject *)ufunc); PyArray_free(ufunc->core_num_dims); PyArray_free(ufunc->core_dim_ixs); PyArray_free(ufunc->core_dim_sizes); PyArray_free(ufunc->core_dim_flags); PyArray_free(ufunc->core_offsets); PyArray_free(ufunc->core_signature); PyArray_free(ufunc->ptr); PyArray_free(ufunc->op_flags); Py_XDECREF(ufunc->userloops); if (ufunc->identity == PyUFunc_IdentityValue) { Py_DECREF(ufunc->identity_value); } Py_XDECREF(ufunc->obj); Py_XDECREF(ufunc->_loops); if (ufunc->_dispatch_cache != NULL) { PyArrayIdentityHash_Dealloc(ufunc->_dispatch_cache); } PyObject_GC_Del(ufunc); } static PyObject * ufunc_repr(PyUFuncObject *ufunc) { return PyUnicode_FromFormat("", ufunc->name); } static int ufunc_traverse(PyUFuncObject *self, visitproc visit, void *arg) { Py_VISIT(self->obj); if (self->identity == PyUFunc_IdentityValue) { Py_VISIT(self->identity_value); } return 0; } /****************************************************************************** *** UFUNC METHODS *** *****************************************************************************/ /* * op.outer(a,b) is equivalent to op(a[:,NewAxis,NewAxis,etc.],b) * where a has b.ndim NewAxis terms appended. * * The result has dimensions a.ndim + b.ndim */ static PyObject * ufunc_outer(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { if (ufunc->core_enabled) { PyErr_Format(PyExc_TypeError, "method outer is not allowed in ufunc with non-trivial"\ " signature"); return NULL; } if (ufunc->nin != 2) { PyErr_SetString(PyExc_ValueError, "outer product only supported "\ "for binary functions"); return NULL; } if (len_args != 2) { PyErr_SetString(PyExc_TypeError, "exactly two arguments expected"); return NULL; } return ufunc_generic_fastcall(ufunc, args, len_args, kwnames, NPY_TRUE); } static PyObject * prepare_input_arguments_for_outer(PyObject *args, PyUFuncObject *ufunc) { PyArrayObject *ap1 = NULL; PyObject *tmp; static PyObject *_numpy_matrix; npy_cache_import("numpy", "matrix", &_numpy_matrix); const char *matrix_deprecation_msg = ( "%s.outer() was passed a numpy matrix as %s argument. " "Special handling of matrix is deprecated and will result in an " "error in most cases. Please convert the matrix to a NumPy " "array to retain the old behaviour. You can use `matrix.A` " "to achieve this."); tmp = PyTuple_GET_ITEM(args, 0); if (PyObject_IsInstance(tmp, _numpy_matrix)) { /* DEPRECATED 2020-05-13, NumPy 1.20 */ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, matrix_deprecation_msg, ufunc->name, "first") < 0) { return NULL; } ap1 = (PyArrayObject *) PyArray_FromObject(tmp, NPY_NOTYPE, 0, 0); } else { ap1 = (PyArrayObject *) PyArray_FROM_O(tmp); } if (ap1 == NULL) { return NULL; } PyArrayObject *ap2 = NULL; tmp = PyTuple_GET_ITEM(args, 1); if (PyObject_IsInstance(tmp, _numpy_matrix)) { /* DEPRECATED 2020-05-13, NumPy 1.20 */ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, matrix_deprecation_msg, ufunc->name, "second") < 0) { Py_DECREF(ap1); return NULL; } ap2 = (PyArrayObject *) PyArray_FromObject(tmp, NPY_NOTYPE, 0, 0); } else { ap2 = (PyArrayObject *) PyArray_FROM_O(tmp); } if (ap2 == NULL) { Py_DECREF(ap1); return NULL; } /* Construct new shape from ap1 and ap2 and then reshape */ PyArray_Dims newdims; npy_intp newshape[NPY_MAXDIMS]; newdims.len = PyArray_NDIM(ap1) + PyArray_NDIM(ap2); newdims.ptr = newshape; if (newdims.len > NPY_MAXDIMS) { PyErr_Format(PyExc_ValueError, "maximum supported dimension for an ndarray is %d, but " "`%s.outer()` result would have %d.", NPY_MAXDIMS, ufunc->name, newdims.len); goto fail; } if (newdims.ptr == NULL) { goto fail; } memcpy(newshape, PyArray_DIMS(ap1), PyArray_NDIM(ap1) * sizeof(npy_intp)); for (int i = PyArray_NDIM(ap1); i < newdims.len; i++) { newshape[i] = 1; } PyArrayObject *ap_new; ap_new = (PyArrayObject *)PyArray_Newshape(ap1, &newdims, NPY_CORDER); if (ap_new == NULL) { goto fail; } if (PyArray_NDIM(ap_new) != newdims.len || !PyArray_CompareLists(PyArray_DIMS(ap_new), newshape, newdims.len)) { PyErr_Format(PyExc_TypeError, "%s.outer() called with ndarray-subclass of type '%s' " "which modified its shape after a reshape. `outer()` relies " "on reshaping the inputs and is for example not supported for " "the 'np.matrix' class (the usage of matrix is generally " "discouraged). " "To work around this issue, please convert the inputs to " "numpy arrays.", ufunc->name, Py_TYPE(ap_new)->tp_name); Py_DECREF(ap_new); goto fail; } Py_DECREF(ap1); return Py_BuildValue("(NN)", ap_new, ap2); fail: Py_XDECREF(ap1); Py_XDECREF(ap2); return NULL; } static PyObject * ufunc_reduce(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { return PyUFunc_GenericReduction( ufunc, args, len_args, kwnames, UFUNC_REDUCE); } static PyObject * ufunc_accumulate(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { return PyUFunc_GenericReduction( ufunc, args, len_args, kwnames, UFUNC_ACCUMULATE); } static PyObject * ufunc_reduceat(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { return PyUFunc_GenericReduction( ufunc, args, len_args, kwnames, UFUNC_REDUCEAT); } /* Helper for ufunc_at, below */ static inline PyArrayObject * new_array_op(PyArrayObject *op_array, char *data) { npy_intp dims[1] = {1}; Py_INCREF(PyArray_DESCR(op_array)); /* NewFromDescr steals a reference */ PyObject *r = PyArray_NewFromDescr(&PyArray_Type, PyArray_DESCR(op_array), 1, dims, NULL, data, NPY_ARRAY_WRITEABLE, NULL); return (PyArrayObject *)r; } /* * Use an indexed loop to do the work * Returns 0 if successful */ static int trivial_at_loop(PyArrayMethodObject *ufuncimpl, NPY_ARRAYMETHOD_FLAGS flags, PyArrayMapIterObject *iter, PyArrayObject *op1_array, PyArrayObject *op2_array, PyArrayMethod_Context *context) { int buffersize=0, errormask = 0; int res; char *args[3]; npy_intp steps[3]; args[0] = (char *) iter->baseoffset; steps[0] = iter->fancy_strides[0]; if (ufuncimpl->nin == 1) { args[2] = NULL; steps[2] = 0; } else { args[2] = (char *)PyArray_DATA(op2_array); if (PyArray_NDIM(op2_array) == 0 || PyArray_DIM(op2_array, 0) <= 1) { steps[2] = 0; } else { steps[2] = PyArray_STRIDE(op2_array, 0); } } npy_intp *inner_size = NpyIter_GetInnerLoopSizePtr(iter->outer); if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { npy_clear_floatstatus_barrier((char *)context); } do { args[1] = (char *) iter->outer_ptrs[0]; steps[1] = iter->outer_strides[0]; res = ufuncimpl->contiguous_indexed_loop( context, args, inner_size, steps, NULL); if (args[2] != NULL) { args[2] += (*inner_size) * steps[2]; } } while (res == 0 && iter->outer_next(iter->outer)); if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { const char * ufunc_name = ufunc_get_name_cstr((PyUFuncObject *)context->caller); if (_get_bufsize_errmask(NULL, ufunc_name, &buffersize, &errormask) < 0) { return -1; } res = _check_ufunc_fperr(errormask, NULL, ufunc_name); } return res; } static int ufunc_at__fast_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags, PyArrayMapIterObject *iter, PyArrayIterObject *iter2, PyArrayObject *op1_array, PyArrayObject *op2_array, PyArrayMethod_StridedLoop *strided_loop, PyArrayMethod_Context *context, NpyAuxData *auxdata ) { int buffersize; int errormask = 0; int res = 0; NPY_BEGIN_THREADS_DEF; if (_get_bufsize_errmask(NULL, ufunc->name, &buffersize, &errormask) < 0) { return -1; } int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); } if (!needs_api) { NPY_BEGIN_THREADS; } npy_intp strides[3] = {0, 0, 0}; /* * Iterate over first and second operands and call ufunc * for each pair of inputs */ for (npy_intp i = iter->size; i > 0; i--) { char *dataptr[3]; /* one element at a time, no stride required but read by innerloop */ npy_intp count = 1; /* * Set up data pointers for either one or two input operands. * The output data pointer points to the first operand data. */ dataptr[0] = iter->dataptr; if (iter2 != NULL) { dataptr[1] = PyArray_ITER_DATA(iter2); dataptr[2] = iter->dataptr; } else { dataptr[1] = iter->dataptr; dataptr[2] = NULL; } res = strided_loop(context, dataptr, &count, strides, auxdata); if (res != 0) { break; } PyArray_MapIterNext(iter); if (iter2 != NULL) { PyArray_ITER_NEXT(iter2); } } NPY_END_THREADS; if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ res = _check_ufunc_fperr(errormask, NULL, "at"); } return res; } static int ufunc_at__slow_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags, PyArrayMapIterObject *iter, PyArrayIterObject *iter2, PyArrayObject *op1_array, PyArrayObject *op2_array, PyArray_Descr *operation_descrs[3], PyArrayMethod_StridedLoop *strided_loop, PyArrayMethod_Context *context, NpyAuxData *auxdata ) { NpyIter *iter_buffer = NULL; PyArrayObject *array_operands[3] = {NULL, NULL, NULL}; int buffersize; int errormask = 0; int res = 0; int nop = 0; NpyIter_IterNextFunc *iternext; char * err_msg = NULL; NPY_BEGIN_THREADS_DEF; if (_get_bufsize_errmask(NULL, ufunc->name, &buffersize, &errormask) < 0) { return -1; } array_operands[0] = new_array_op(op1_array, iter->dataptr); if (iter2 != NULL) { array_operands[1] = new_array_op(op2_array, PyArray_ITER_DATA(iter2)); array_operands[2] = new_array_op(op1_array, iter->dataptr); nop = 3; } else { array_operands[1] = new_array_op(op1_array, iter->dataptr); array_operands[2] = NULL; nop = 2; } /* Set up the flags */ npy_uint32 op_flags[3]; op_flags[0] = NPY_ITER_READONLY| NPY_ITER_ALIGNED; if (iter2 != NULL) { op_flags[1] = NPY_ITER_READONLY| NPY_ITER_ALIGNED; op_flags[2] = NPY_ITER_WRITEONLY| NPY_ITER_ALIGNED| NPY_ITER_ALLOCATE| NPY_ITER_NO_BROADCAST| NPY_ITER_NO_SUBTYPE; } else { op_flags[1] = NPY_ITER_WRITEONLY| NPY_ITER_ALIGNED| NPY_ITER_ALLOCATE| NPY_ITER_NO_BROADCAST| NPY_ITER_NO_SUBTYPE; } /* * Create NpyIter object to "iterate" over single element of each input * operand. This is an easy way to reuse the NpyIter logic for dealing * with certain cases like casting operands to correct dtype. On each * iteration over the MapIterArray object created above, we'll take the * current data pointers from that and reset this NpyIter object using * those data pointers, and then trigger a buffer copy. The buffer data * pointers from the NpyIter object will then be passed to the inner loop * function. */ iter_buffer = NpyIter_AdvancedNew(nop, array_operands, NPY_ITER_EXTERNAL_LOOP| NPY_ITER_REFS_OK| NPY_ITER_ZEROSIZE_OK| NPY_ITER_BUFFERED| NPY_ITER_GROWINNER| NPY_ITER_DELAY_BUFALLOC, NPY_KEEPORDER, NPY_UNSAFE_CASTING, op_flags, operation_descrs, -1, NULL, NULL, buffersize); if (iter_buffer == NULL) { /* will fail only on memory allocation errors */ for (int i = 0; i < 3; i++) { Py_XDECREF(array_operands[i]); } return -1; } iternext = NpyIter_GetIterNext(iter_buffer, NULL); if (iternext == NULL) { /* can not really happen, iter_buffer creation is tightly controlled */ NpyIter_Deallocate(iter_buffer); for (int i = 0; i < 3; i++) { Py_XDECREF(array_operands[i]); } return -1; } int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; needs_api |= NpyIter_IterationNeedsAPI(iter_buffer); if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* Start with the floating-point exception flags cleared */ npy_clear_floatstatus_barrier((char*)&iter); } npy_intp strides[3] = {0, 0, 0}; if (!needs_api) { NPY_BEGIN_THREADS; } /* * Iterate over first and second operands and call ufunc * for each pair of inputs */ for (npy_intp i = iter->size; i > 0; i--) { char *dataptr[3]; char **buffer_dataptr; /* one element at a time, no stride required but read by innerloop */ npy_intp count = 1; /* * Set up data pointers for either one or two input operands. * The output data pointer points to the first operand data. */ dataptr[0] = iter->dataptr; if (iter2 != NULL) { dataptr[1] = PyArray_ITER_DATA(iter2); dataptr[2] = iter->dataptr; } else { dataptr[1] = iter->dataptr; dataptr[2] = NULL; } /* Reset NpyIter data pointers which will trigger a buffer copy */ NpyIter_ResetBasePointers(iter_buffer, dataptr, &err_msg); if (err_msg) { res = -1; break; } buffer_dataptr = NpyIter_GetDataPtrArray(iter_buffer); res = strided_loop(context, buffer_dataptr, &count, strides, auxdata); if (res != 0) { break; } /* * Call to iternext triggers copy from buffer back to output array * after innerloop puts result in buffer. */ iternext(iter_buffer); PyArray_MapIterNext(iter); if (iter2 != NULL) { PyArray_ITER_NEXT(iter2); } } NPY_END_THREADS; if (res != 0 && err_msg) { PyErr_SetString(PyExc_ValueError, err_msg); } if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { /* NOTE: We could check float errors even when `res < 0` */ res = _check_ufunc_fperr(errormask, NULL, "at"); } NpyIter_Deallocate(iter_buffer); for (int i = 0; i < 3; i++) { Py_XDECREF(array_operands[i]); } return res; } /* * Call ufunc only on selected array items and store result in first operand. * For add ufunc, method call is equivalent to op1[idx] += op2 with no * buffering of the first operand. * Arguments: * op1 - First operand to ufunc * idx - Indices that are applied to first operand. Equivalent to op1[idx]. * op2 - Second operand to ufunc (if needed). Must be able to broadcast * over first operand. */ static PyObject * ufunc_at(PyUFuncObject *ufunc, PyObject *args) { PyObject *op1 = NULL; PyObject *idx = NULL; PyObject *op2 = NULL; PyArrayObject *op1_array = NULL; PyArrayObject *op2_array = NULL; PyArrayMapIterObject *iter = NULL; PyArrayIterObject *iter2 = NULL; PyArray_Descr *operation_descrs[3] = {NULL, NULL, NULL}; int nop; /* override vars */ int errval; PyObject *override = NULL; int res = -1; /* start with fail condition so "goto fail" will error */ PyArrayMethod_StridedLoop *strided_loop; NpyAuxData *auxdata = NULL; if (ufunc->core_enabled) { PyErr_Format(PyExc_TypeError, "%s.at does not support ufunc with non-trivial signature: %s has signature %s.", ufunc->name, ufunc->name, ufunc->core_signature); return NULL; } if (ufunc->nin > 2) { PyErr_SetString(PyExc_ValueError, "Only unary and binary ufuncs supported at this time"); return NULL; } if (ufunc->nout != 1) { PyErr_SetString(PyExc_ValueError, "Only single output ufuncs supported at this time"); return NULL; } if (!PyArg_ParseTuple(args, "OO|O:at", &op1, &idx, &op2)) { return NULL; } if (ufunc->nin == 2 && op2 == NULL) { PyErr_SetString(PyExc_ValueError, "second operand needed for ufunc"); return NULL; } if (ufunc->nin == 1 && op2 != NULL) { PyErr_SetString(PyExc_ValueError, "second operand provided when ufunc is unary"); return NULL; } errval = PyUFunc_CheckOverride(ufunc, "at", args, NULL, NULL, NULL, 0, NULL, &override); if (errval) { return NULL; } else if (override) { return override; } if (!PyArray_Check(op1)) { PyErr_SetString(PyExc_TypeError, "first operand must be array"); return NULL; } op1_array = (PyArrayObject *)op1; /* Create second operand from number array if needed. */ if (op2 == NULL) { nop = 2; } else { nop = 3; op2_array = (PyArrayObject *)PyArray_FromAny(op2, NULL, 0, 0, 0, NULL); if (op2_array == NULL) { goto fail; } } PyArrayMethodObject *ufuncimpl = NULL; { /* Do all the dtype handling and find the correct ufuncimpl */ PyArrayObject *tmp_operands[3] = {NULL, NULL, NULL}; PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL}; PyArray_DTypeMeta *operand_DTypes[3] = {NULL, NULL, NULL}; /* * Create dtypes array for either one or two input operands. * Compare to the logic in `convert_ufunc_arguments`. * TODO: It may be good to review some of this behaviour, since the * operand array is special (it is written to) similar to reductions. * Using unsafe-casting as done here, is likely not desirable. */ tmp_operands[0] = op1_array; operand_DTypes[0] = NPY_DTYPE(PyArray_DESCR(op1_array)); Py_INCREF(operand_DTypes[0]); int force_legacy_promotion = 0; int allow_legacy_promotion = NPY_DT_is_legacy(operand_DTypes[0]); if (op2_array != NULL) { tmp_operands[1] = op2_array; operand_DTypes[1] = NPY_DTYPE(PyArray_DESCR(op2_array)); Py_INCREF(operand_DTypes[1]); allow_legacy_promotion &= NPY_DT_is_legacy(operand_DTypes[1]); tmp_operands[2] = tmp_operands[0]; operand_DTypes[2] = operand_DTypes[0]; Py_INCREF(operand_DTypes[2]); if (allow_legacy_promotion && ((PyArray_NDIM(op1_array) == 0) != (PyArray_NDIM(op2_array) == 0))) { /* both are legacy and only one is 0-D: force legacy */ force_legacy_promotion = should_use_min_scalar(2, tmp_operands, 0, NULL); } } else { tmp_operands[1] = tmp_operands[0]; operand_DTypes[1] = operand_DTypes[0]; Py_INCREF(operand_DTypes[1]); tmp_operands[2] = NULL; } ufuncimpl = promote_and_get_ufuncimpl(ufunc, tmp_operands, signature, operand_DTypes, force_legacy_promotion, allow_legacy_promotion, NPY_FALSE, NPY_FALSE); if (ufuncimpl == NULL) { for (int i = 0; i < 3; i++) { Py_XDECREF(signature[i]); Py_XDECREF(operand_DTypes[i]); } goto fail; } /* Find the correct operation_descrs for the operation */ int resolve_result = resolve_descriptors(nop, ufunc, ufuncimpl, tmp_operands, operation_descrs, signature, NPY_UNSAFE_CASTING); for (int i = 0; i < 3; i++) { Py_XDECREF(signature[i]); Py_XDECREF(operand_DTypes[i]); } if (resolve_result < 0) { goto fail; } } iter = (PyArrayMapIterObject *)PyArray_MapIterArrayCopyIfOverlap( op1_array, idx, 1, op2_array); if (iter == NULL) { goto fail; } op1_array = iter->array; /* May be updateifcopied on overlap */ if (op2_array != NULL) { /* * May need to swap axes so that second operand is * iterated over correctly */ if ((iter->subspace != NULL) && (iter->consec)) { PyArray_MapIterSwapAxes(iter, &op2_array, 0); if (op2_array == NULL) { /* only on memory allocation failure */ goto fail; } } /* * Create array iter object for second operand that * "matches" the map iter object for the first operand. * Then we can just iterate over the first and second * operands at the same time and not have to worry about * picking the correct elements from each operand to apply * the ufunc to. */ if ((iter2 = (PyArrayIterObject *)\ PyArray_BroadcastToShape((PyObject *)op2_array, iter->dimensions, iter->nd))==NULL) { goto fail; } } PyArrayMethod_Context context = { .caller = (PyObject *)ufunc, .method = ufuncimpl, .descriptors = operation_descrs, }; /* Use contiguous strides; if there is such a loop it may be faster */ npy_intp strides[3] = { operation_descrs[0]->elsize, operation_descrs[1]->elsize, 0}; if (nop == 3) { strides[2] = operation_descrs[2]->elsize; } NPY_ARRAYMETHOD_FLAGS flags; if (ufuncimpl->get_strided_loop(&context, 1, 0, strides, &strided_loop, &auxdata, &flags) < 0) { goto fail; } int fast_path = 1; /* check no casting, alignment */ if (PyArray_DESCR(op1_array) != operation_descrs[0]) { fast_path = 0; } if (PyArray_DESCR(op1_array) != operation_descrs[nop - 1]) { /* output casting */ fast_path = 0; } if (!PyArray_ISALIGNED(op1_array)) { fast_path = 0; } if (nop >2) { if (PyArray_DESCR(op2_array) != operation_descrs[1]) { fast_path = 0; } if (!PyArray_ISALIGNED(op2_array)) { fast_path = 0; } } if (fast_path == 1) { /* * Try to use trivial loop (1d, no casting, aligned) if * - the matching info has a indexed loop * - idx must be exactly one integer index array * - all operands are 1d * A future enhancement could loosen the restriction on 1d operands * by adding an iteration loop inside trivial_at_loop */ if ((ufuncimpl->contiguous_indexed_loop != NULL) && (PyArray_NDIM(op1_array) == 1) && (op2_array == NULL || PyArray_NDIM(op2_array) <= 1) && (iter->subspace_iter == NULL) && (iter->numiter == 1)) { res = trivial_at_loop(ufuncimpl, flags, iter, op1_array, op2_array, &context); } else { /* Couldn't use the fastest path, use the faster path */ res = ufunc_at__fast_iter(ufunc, flags, iter, iter2, op1_array, op2_array, strided_loop, &context, auxdata); } } else { res = ufunc_at__slow_iter(ufunc, flags, iter, iter2, op1_array, op2_array, operation_descrs, strided_loop, &context, auxdata); } fail: NPY_AUXDATA_FREE(auxdata); Py_XDECREF(op2_array); Py_XDECREF(iter); Py_XDECREF(iter2); for (int i = 0; i < nop; i++) { Py_XDECREF(operation_descrs[i]); } /* * An error should only be possible if needs_api is true or `res != 0`, * but this is not strictly correct for old-style ufuncs * (e.g. `power` released the GIL but manually set an Exception). */ if (res != 0 || PyErr_Occurred()) { /* iter_buffer has already been deallocated, don't use NpyIter_Dealloc */ if (PyArray_FLAGS(op1_array) & NPY_ARRAY_WRITEBACKIFCOPY) { PyArray_DiscardWritebackIfCopy(op1_array); } return NULL; } else { Py_RETURN_NONE; } } typedef struct { PyArrayMethod_StridedLoop *strided_loop; PyArrayMethod_Context *context; NpyAuxData *auxdata; /* Should move to flags, but lets keep it bools for now: */ npy_bool requires_pyapi; npy_bool no_floatingpoint_errors; PyArrayMethod_Context _full_context; PyArray_Descr *_descrs[]; } ufunc_call_info; void free_ufunc_call_info(PyObject *self) { ufunc_call_info *call_info = PyCapsule_GetPointer( self, "numpy_1.24_ufunc_call_info"); PyArrayMethod_Context *context = call_info->context; int nargs = context->method->nin + context->method->nout; for (int i = 0; i < nargs; i++) { Py_DECREF(context->descriptors[i]); } Py_DECREF(context->caller); Py_DECREF(context->method); NPY_AUXDATA_FREE(call_info->auxdata); PyObject_Free(call_info); } /* * Python entry-point to ufunc promotion and dtype/descr resolution. * * This function does most of the work required to execute ufunc without * actually executing it. * This can be very useful for downstream libraries that reimplement NumPy * functionality, such as Numba or Dask. */ static PyObject * py_resolve_dtypes_generic(PyUFuncObject *ufunc, npy_bool return_context, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { NPY_PREPARE_ARGPARSER; PyObject *descrs_tuple; PyObject *signature_obj = NULL; NPY_CASTING casting = NPY_DEFAULT_ASSIGN_CASTING; npy_bool reduction = NPY_FALSE; if (npy_parse_arguments("resolve_dtypes", args, len_args, kwnames, "", NULL, &descrs_tuple, "$signature", NULL, &signature_obj, "$casting", &PyArray_CastingConverter, &casting, "$reduction", &PyArray_BoolConverter, &reduction, NULL, NULL, NULL) < 0) { return NULL; } if (reduction && (ufunc->nin != 2 || ufunc->nout != 1)) { PyErr_SetString(PyExc_ValueError, "ufunc is not compatible with reduction operations."); return NULL; } /* * Legacy type resolvers expect NumPy arrays as input. Until NEP 50 is * adopted, it is most convenient to ensure that we have an "array" object * before calling the type promotion. Eventually, this hack may be moved * into the legacy type resolution code itself (probably after NumPy stops * using legacy type resolution itself for the most part). * * We make the pretty safe assumptions here that: * - Nobody will actually do anything with the array objects besides * checking the descriptor or calling CanCast. * - No type resolver will cause weird paths that mess with our promotion * state (or mind us messing with it). */ PyObject *result = NULL; PyObject *result_dtype_tuple = NULL; PyArrayObject *dummy_arrays[NPY_MAXARGS] = {NULL}; PyArray_DTypeMeta *DTypes[NPY_MAXARGS] = {NULL}; PyArray_DTypeMeta *signature[NPY_MAXARGS] = {NULL}; PyArray_Descr *operation_descrs[NPY_MAXARGS] = {NULL}; /* This entry-point to promotion lives in the NEP 50 future: */ int original_promotion_state = npy_promotion_state; npy_promotion_state = NPY_USE_WEAK_PROMOTION; npy_bool promoting_pyscalars = NPY_FALSE; npy_bool allow_legacy_promotion = NPY_TRUE; if (_get_fixed_signature(ufunc, NULL, signature_obj, signature) < 0) { goto finish; } if (!PyTuple_CheckExact(descrs_tuple) || PyTuple_Size(descrs_tuple) != ufunc->nargs) { PyErr_SetString(PyExc_TypeError, "resolve_dtypes: The dtypes must be a tuple of " "`ufunc.nargs` length."); goto finish; } for (int i=0; i < ufunc->nargs; i++) { /* * We create dummy arrays for now. It should be OK to make this * truly "dummy" (not even proper objects), but that is a hack better * left for the legacy_type_resolution wrapper when NEP 50 is done. */ PyObject *descr_obj = PyTuple_GET_ITEM(descrs_tuple, i); PyArray_Descr *descr; if (PyArray_DescrCheck(descr_obj)) { descr = (PyArray_Descr *)descr_obj; Py_INCREF(descr); dummy_arrays[i] = (PyArrayObject *)PyArray_NewFromDescr_int( &PyArray_Type, descr, 0, NULL, NULL, NULL, 0, NULL, NULL, _NPY_ARRAY_ENSURE_DTYPE_IDENTITY); if (dummy_arrays[i] == NULL) { goto finish; } DTypes[i] = NPY_DTYPE(descr); Py_INCREF(DTypes[i]); if (!NPY_DT_is_legacy(DTypes[i])) { allow_legacy_promotion = NPY_FALSE; } } /* Explicitly allow int, float, and complex for the "weak" types. */ else if (descr_obj == (PyObject *)&PyLong_Type) { descr = PyArray_DescrFromType(NPY_LONG); dummy_arrays[i] = (PyArrayObject *)PyArray_Empty(0, NULL, descr, 0); if (dummy_arrays[i] == NULL) { goto finish; } PyArray_ENABLEFLAGS(dummy_arrays[i], NPY_ARRAY_WAS_PYTHON_INT); Py_INCREF(&PyArray_PyIntAbstractDType); DTypes[i] = &PyArray_PyIntAbstractDType; promoting_pyscalars = NPY_TRUE; } else if (descr_obj == (PyObject *)&PyFloat_Type) { descr = PyArray_DescrFromType(NPY_DOUBLE); dummy_arrays[i] = (PyArrayObject *)PyArray_Empty(0, NULL, descr, 0); if (dummy_arrays[i] == NULL) { goto finish; } PyArray_ENABLEFLAGS(dummy_arrays[i], NPY_ARRAY_WAS_PYTHON_FLOAT); Py_INCREF(&PyArray_PyFloatAbstractDType); DTypes[i] = &PyArray_PyFloatAbstractDType; promoting_pyscalars = NPY_TRUE; } else if (descr_obj == (PyObject *)&PyComplex_Type) { descr = PyArray_DescrFromType(NPY_CDOUBLE); dummy_arrays[i] = (PyArrayObject *)PyArray_Empty(0, NULL, descr, 0); if (dummy_arrays[i] == NULL) { goto finish; } PyArray_ENABLEFLAGS(dummy_arrays[i], NPY_ARRAY_WAS_PYTHON_COMPLEX); Py_INCREF(&PyArray_PyComplexAbstractDType); DTypes[i] = &PyArray_PyComplexAbstractDType; promoting_pyscalars = NPY_TRUE; } else if (descr_obj == Py_None) { if (i < ufunc->nin && !(reduction && i == 0)) { PyErr_SetString(PyExc_TypeError, "All input dtypes must be provided " "(except the first one in reductions)"); goto finish; } } else { PyErr_SetString(PyExc_TypeError, "Provided dtype must be a valid NumPy dtype, " "int, float, complex, or None."); goto finish; } } PyArrayMethodObject *ufuncimpl; if (!reduction) { ufuncimpl = promote_and_get_ufuncimpl(ufunc, dummy_arrays, signature, DTypes, NPY_FALSE, allow_legacy_promotion, promoting_pyscalars, NPY_FALSE); if (ufuncimpl == NULL) { goto finish; } /* Find the correct descriptors for the operation */ if (resolve_descriptors(ufunc->nargs, ufunc, ufuncimpl, dummy_arrays, operation_descrs, signature, casting) < 0) { goto finish; } if (validate_casting( ufuncimpl, ufunc, dummy_arrays, operation_descrs, casting) < 0) { goto finish; } } else { /* reduction */ if (signature[2] != NULL) { PyErr_SetString(PyExc_ValueError, "Reduction signature must end with None, instead pass " "the first DType in the signature."); goto finish; } if (dummy_arrays[2] != NULL) { PyErr_SetString(PyExc_TypeError, "Output dtype must not be passed for reductions, " "pass the first input instead."); goto finish; } ufuncimpl = reducelike_promote_and_resolve(ufunc, dummy_arrays[1], dummy_arrays[0], signature, NPY_FALSE, operation_descrs, casting, "resolve_dtypes"); if (ufuncimpl == NULL) { goto finish; } } result = PyArray_TupleFromItems( ufunc->nargs, (PyObject **)operation_descrs, 0); if (result == NULL || !return_context) { goto finish; } /* Result will be (dtype_tuple, call_info), so move it and clear result */ result_dtype_tuple = result; result = NULL; /* We may have to return the context: */ ufunc_call_info *call_info; call_info = PyObject_Malloc(sizeof(ufunc_call_info) + ufunc->nargs * sizeof(PyArray_Descr *)); if (call_info == NULL) { PyErr_NoMemory(); goto finish; } call_info->strided_loop = NULL; call_info->auxdata = NULL; call_info->context = &call_info->_full_context; /* * We create a capsule with NumPy 1.24 in the name to signal that it is * prone to change in version updates (it doesn't have to). * This capsule is documented in the `ufunc._resolve_dtypes_and_context` * docstring. */ PyObject *capsule = PyCapsule_New( call_info, "numpy_1.24_ufunc_call_info", &free_ufunc_call_info); if (capsule == NULL) { PyObject_Free(call_info); goto finish; } PyArrayMethod_Context *context = call_info->context; Py_INCREF(ufunc); context->caller = (PyObject *)ufunc; Py_INCREF(ufuncimpl); context->method = ufuncimpl; context->descriptors = call_info->_descrs; for (int i=0; i < ufunc->nargs; i++) { Py_INCREF(operation_descrs[i]); context->descriptors[i] = operation_descrs[i]; } result = PyTuple_Pack(2, result_dtype_tuple, capsule); /* cleanup and return */ Py_DECREF(capsule); finish: npy_promotion_state = original_promotion_state; Py_XDECREF(result_dtype_tuple); for (int i = 0; i < ufunc->nargs; i++) { Py_XDECREF(signature[i]); Py_XDECREF(dummy_arrays[i]); Py_XDECREF(operation_descrs[i]); Py_XDECREF(DTypes[i]); } return result; } static PyObject * py_resolve_dtypes(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { return py_resolve_dtypes_generic(ufunc, NPY_FALSE, args, len_args, kwnames); } static PyObject * py_resolve_dtypes_and_context(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { return py_resolve_dtypes_generic(ufunc, NPY_TRUE, args, len_args, kwnames); } static PyObject * py_get_strided_loop(PyUFuncObject *ufunc, PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { NPY_PREPARE_ARGPARSER; PyObject *call_info_obj; PyObject *fixed_strides_obj = Py_None; npy_intp fixed_strides[NPY_MAXARGS]; if (npy_parse_arguments("_get_strided_loop", args, len_args, kwnames, "", NULL, &call_info_obj, "$fixed_strides", NULL, &fixed_strides_obj, NULL, NULL, NULL) < 0) { return NULL; } ufunc_call_info *call_info = PyCapsule_GetPointer( call_info_obj, "numpy_1.24_ufunc_call_info"); if (call_info == NULL) { /* Cannot have a context with NULL inside... */ assert(PyErr_Occurred()); return NULL; } if (call_info->strided_loop != NULL) { PyErr_SetString(PyExc_TypeError, "ufunc call_info has already been filled/used!"); return NULL; } if (call_info->context->caller != (PyObject *)ufunc) { PyErr_SetString(PyExc_TypeError, "calling get_strided_loop with incompatible context"); return NULL; } /* * Strict conversion of fixed_strides, None, or tuple of int or None. */ if (fixed_strides_obj == Py_None) { for (int i = 0; i < ufunc->nargs; i++) { fixed_strides[i] = NPY_MAX_INTP; } } else if (PyTuple_CheckExact(fixed_strides_obj) && PyTuple_Size(fixed_strides_obj) == ufunc->nargs) { for (int i = 0; i < ufunc->nargs; i++) { PyObject *stride = PyTuple_GET_ITEM(fixed_strides_obj, i); if (PyLong_CheckExact(stride)) { fixed_strides[i] = PyLong_AsSsize_t(stride); if (error_converting(fixed_strides[i])) { return NULL; } } else if (stride == Py_None) { fixed_strides[i] = NPY_MAX_INTP; } else { PyErr_SetString(PyExc_TypeError, "_get_strided_loop(): fixed_strides tuple must contain " "Python ints or None"); return NULL; } } } else { PyErr_SetString(PyExc_TypeError, "_get_strided_loop(): fixed_strides must be a tuple or None"); return NULL; } NPY_ARRAYMETHOD_FLAGS flags; if (call_info->context->method->get_strided_loop(call_info->context, 1, 0, fixed_strides, &call_info->strided_loop, &call_info->auxdata, &flags) < 0) { return NULL; } call_info->requires_pyapi = flags & NPY_METH_REQUIRES_PYAPI; call_info->no_floatingpoint_errors = ( flags & NPY_METH_NO_FLOATINGPOINT_ERRORS); Py_RETURN_NONE; } static struct PyMethodDef ufunc_methods[] = { {"reduce", (PyCFunction)ufunc_reduce, METH_FASTCALL | METH_KEYWORDS, NULL }, {"accumulate", (PyCFunction)ufunc_accumulate, METH_FASTCALL | METH_KEYWORDS, NULL }, {"reduceat", (PyCFunction)ufunc_reduceat, METH_FASTCALL | METH_KEYWORDS, NULL }, {"outer", (PyCFunction)ufunc_outer, METH_FASTCALL | METH_KEYWORDS, NULL}, {"at", (PyCFunction)ufunc_at, METH_VARARGS, NULL}, /* Lower level methods: */ {"resolve_dtypes", (PyCFunction)py_resolve_dtypes, METH_FASTCALL | METH_KEYWORDS, NULL}, /* * The following two functions are public API, but underscored since they * are C-user specific and allow direct access to the core of ufunc loops. * (See their documentation for API stability.) */ {"_resolve_dtypes_and_context", (PyCFunction)py_resolve_dtypes_and_context, METH_FASTCALL | METH_KEYWORDS, NULL}, {"_get_strided_loop", (PyCFunction)py_get_strided_loop, METH_FASTCALL | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; /****************************************************************************** *** UFUNC GETSET *** *****************************************************************************/ static char _typecharfromnum(int num) { PyArray_Descr *descr; char ret; descr = PyArray_DescrFromType(num); ret = descr->type; Py_DECREF(descr); return ret; } static PyObject * ufunc_get_doc(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { static PyObject *_sig_formatter; PyObject *doc; npy_cache_import( "numpy.core._internal", "_ufunc_doc_signature_formatter", &_sig_formatter); if (_sig_formatter == NULL) { return NULL; } /* * Put docstring first or FindMethod finds it... could so some * introspection on name and nin + nout to automate the first part * of it the doc string shouldn't need the calling convention */ doc = PyObject_CallFunctionObjArgs(_sig_formatter, (PyObject *)ufunc, NULL); if (doc == NULL) { return NULL; } if (ufunc->doc != NULL) { Py_SETREF(doc, PyUnicode_FromFormat("%S\n\n%s", doc, ufunc->doc)); } return doc; } static PyObject * ufunc_get_nin(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { return PyLong_FromLong(ufunc->nin); } static PyObject * ufunc_get_nout(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { return PyLong_FromLong(ufunc->nout); } static PyObject * ufunc_get_nargs(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { return PyLong_FromLong(ufunc->nargs); } static PyObject * ufunc_get_ntypes(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { return PyLong_FromLong(ufunc->ntypes); } static PyObject * ufunc_get_types(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { /* return a list with types grouped input->output */ PyObject *list; PyObject *str; int k, j, n, nt = ufunc->ntypes; int ni = ufunc->nin; int no = ufunc->nout; char *t; list = PyList_New(nt); if (list == NULL) { return NULL; } t = PyArray_malloc(no+ni+2); n = 0; for (k = 0; k < nt; k++) { for (j = 0; jtypes[n]); n++; } t[ni] = '-'; t[ni+1] = '>'; for (j = 0; j < no; j++) { t[ni + 2 + j] = _typecharfromnum(ufunc->types[n]); n++; } str = PyUnicode_FromStringAndSize(t, no + ni + 2); PyList_SET_ITEM(list, k, str); } PyArray_free(t); return list; } static PyObject * ufunc_get_name(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { return PyUnicode_FromString(ufunc->name); } static PyObject * ufunc_get_identity(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { npy_bool reorderable; return PyUFunc_GetDefaultIdentity(ufunc, &reorderable); } static PyObject * ufunc_get_signature(PyUFuncObject *ufunc, void *NPY_UNUSED(ignored)) { if (!ufunc->core_enabled) { Py_RETURN_NONE; } return PyUnicode_FromString(ufunc->core_signature); } #undef _typecharfromnum /* * Docstring is now set from python * static char *Ufunctype__doc__ = NULL; */ static PyGetSetDef ufunc_getset[] = { {"__doc__", (getter)ufunc_get_doc, NULL, NULL, NULL}, {"nin", (getter)ufunc_get_nin, NULL, NULL, NULL}, {"nout", (getter)ufunc_get_nout, NULL, NULL, NULL}, {"nargs", (getter)ufunc_get_nargs, NULL, NULL, NULL}, {"ntypes", (getter)ufunc_get_ntypes, NULL, NULL, NULL}, {"types", (getter)ufunc_get_types, NULL, NULL, NULL}, {"__name__", (getter)ufunc_get_name, NULL, NULL, NULL}, {"identity", (getter)ufunc_get_identity, NULL, NULL, NULL}, {"signature", (getter)ufunc_get_signature, NULL, NULL, NULL}, {NULL, NULL, NULL, NULL, NULL}, /* Sentinel */ }; /****************************************************************************** *** UFUNC TYPE OBJECT *** *****************************************************************************/ NPY_NO_EXPORT PyTypeObject PyUFunc_Type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "numpy.ufunc", .tp_basicsize = sizeof(PyUFuncObject), .tp_dealloc = (destructor)ufunc_dealloc, .tp_vectorcall_offset = offsetof(PyUFuncObject, vectorcall), .tp_repr = (reprfunc)ufunc_repr, .tp_call = &PyVectorcall_Call, .tp_str = (reprfunc)ufunc_repr, .tp_flags = Py_TPFLAGS_DEFAULT | _Py_TPFLAGS_HAVE_VECTORCALL | Py_TPFLAGS_HAVE_GC, .tp_traverse = (traverseproc)ufunc_traverse, .tp_methods = ufunc_methods, .tp_getset = ufunc_getset, }; /* End of code for ufunc objects */