diff options
author | mattip <matti.picus@gmail.com> | 2018-06-25 12:35:18 -0700 |
---|---|---|
committer | mattip <matti.picus@gmail.com> | 2018-08-21 20:06:17 +0300 |
commit | 1bca48bb13dff1324078ca7dd968623c2a7f923f (patch) | |
tree | 86bd3d7ffbd8a0bb68bfbb91fa2f6a06ccb6528c /numpy/core/src/common | |
parent | bd4500fa91456230ac9d74c704956788fe3e6ac7 (diff) | |
download | numpy-1bca48bb13dff1324078ca7dd968623c2a7f923f.tar.gz |
MAINT: move unchanging, non-api files to common
Diffstat (limited to 'numpy/core/src/common')
-rw-r--r-- | numpy/core/src/common/array_assign.c | 118 | ||||
-rw-r--r-- | numpy/core/src/common/array_assign.h | 100 | ||||
-rw-r--r-- | numpy/core/src/common/cblasfuncs.c | 693 | ||||
-rw-r--r-- | numpy/core/src/common/cblasfuncs.h | 7 | ||||
-rw-r--r-- | numpy/core/src/common/python_xerbla.c | 51 | ||||
-rw-r--r-- | numpy/core/src/common/ucsnarrow.c | 174 | ||||
-rw-r--r-- | numpy/core/src/common/ucsnarrow.h | 13 |
7 files changed, 1156 insertions, 0 deletions
diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c new file mode 100644 index 000000000..a48e245d8 --- /dev/null +++ b/numpy/core/src/common/array_assign.c @@ -0,0 +1,118 @@ +/* + * This file implements some helper functions for the array assignment + * routines. The actual assignment routines are in array_assign_*.c + * + * Written by Mark Wiebe (mwwiebe@gmail.com) + * Copyright (c) 2011 by Enthought, Inc. + * + * See LICENSE.txt for the license. + */ + +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include <numpy/ndarraytypes.h> + +#include "npy_config.h" +#include "npy_pycompat.h" + +#include "shape.h" + +#include "array_assign.h" +#include "common.h" +#include "lowlevel_strided_loops.h" +#include "mem_overlap.h" + +/* See array_assign.h for parameter documentation */ +NPY_NO_EXPORT int +broadcast_strides(int ndim, npy_intp *shape, + int strides_ndim, npy_intp *strides_shape, npy_intp *strides, + char *strides_name, + npy_intp *out_strides) +{ + int idim, idim_start = ndim - strides_ndim; + + /* Can't broadcast to fewer dimensions */ + if (idim_start < 0) { + goto broadcast_error; + } + + /* + * Process from the end to the start, so that 'strides' and 'out_strides' + * can point to the same memory. + */ + for (idim = ndim - 1; idim >= idim_start; --idim) { + npy_intp strides_shape_value = strides_shape[idim - idim_start]; + /* If it doesn't have dimension one, it must match */ + if (strides_shape_value == 1) { + out_strides[idim] = 0; + } + else if (strides_shape_value != shape[idim]) { + goto broadcast_error; + } + else { + out_strides[idim] = strides[idim - idim_start]; + } + } + + /* New dimensions get a zero stride */ + for (idim = 0; idim < idim_start; ++idim) { + out_strides[idim] = 0; + } + + return 0; + +broadcast_error: { + PyObject *errmsg; + + errmsg = PyUString_FromFormat("could not broadcast %s from shape ", + strides_name); + PyUString_ConcatAndDel(&errmsg, + build_shape_string(strides_ndim, strides_shape)); + PyUString_ConcatAndDel(&errmsg, + PyUString_FromString(" into shape ")); + PyUString_ConcatAndDel(&errmsg, + build_shape_string(ndim, shape)); + PyErr_SetObject(PyExc_ValueError, errmsg); + Py_DECREF(errmsg); + + return -1; + } +} + +/* See array_assign.h for parameter documentation */ +NPY_NO_EXPORT int +raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment) +{ + if (alignment > 1) { + npy_intp align_check = (npy_intp)data; + int idim; + + for (idim = 0; idim < ndim; ++idim) { + align_check |= strides[idim]; + } + + return npy_is_aligned((void *)align_check, alignment); + } + else { + return 1; + } +} + + +/* Returns 1 if the arrays have overlapping data, 0 otherwise */ +NPY_NO_EXPORT int +arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) +{ + mem_overlap_t result; + + result = solve_may_share_memory(arr1, arr2, NPY_MAY_SHARE_BOUNDS); + if (result == MEM_OVERLAP_NO) { + return 0; + } + else { + return 1; + } +} diff --git a/numpy/core/src/common/array_assign.h b/numpy/core/src/common/array_assign.h new file mode 100644 index 000000000..3fecff007 --- /dev/null +++ b/numpy/core/src/common/array_assign.h @@ -0,0 +1,100 @@ +#ifndef _NPY_PRIVATE__ARRAY_ASSIGN_H_ +#define _NPY_PRIVATE__ARRAY_ASSIGN_H_ + +/* + * An array assignment function for copying arrays, treating the + * arrays as flat according to their respective ordering rules. + * This function makes a temporary copy of 'src' if 'src' and + * 'dst' overlap, to be able to handle views of the same data with + * different strides. + * + * dst: The destination array. + * dst_order: The rule for how 'dst' is to be made flat. + * src: The source array. + * src_order: The rule for how 'src' is to be made flat. + * casting: An exception is raised if the copy violates this + * casting rule. + * + * Returns 0 on success, -1 on failure. + */ +/* Not yet implemented +NPY_NO_EXPORT int +PyArray_AssignArrayAsFlat(PyArrayObject *dst, NPY_ORDER dst_order, + PyArrayObject *src, NPY_ORDER src_order, + NPY_CASTING casting, + npy_bool preservena, npy_bool *preservewhichna); +*/ + +NPY_NO_EXPORT int +PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src, + PyArrayObject *wheremask, + NPY_CASTING casting); + +NPY_NO_EXPORT int +PyArray_AssignRawScalar(PyArrayObject *dst, + PyArray_Descr *src_dtype, char *src_data, + PyArrayObject *wheremask, + NPY_CASTING casting); + +/******** LOW-LEVEL SCALAR TO ARRAY ASSIGNMENT ********/ + +/* + * Assigns the scalar value to every element of the destination raw array. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +raw_array_assign_scalar(int ndim, npy_intp *shape, + PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides, + PyArray_Descr *src_dtype, char *src_data); + +/* + * Assigns the scalar value to every element of the destination raw array + * where the 'wheremask' value is True. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape, + PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides, + PyArray_Descr *src_dtype, char *src_data, + PyArray_Descr *wheremask_dtype, char *wheremask_data, + npy_intp *wheremask_strides); + +/******** LOW-LEVEL ARRAY MANIPULATION HELPERS ********/ + +/* + * Internal detail of how much to buffer during array assignments which + * need it. This is for more complex NA masking operations where masks + * need to be inverted or combined together. + */ +#define NPY_ARRAY_ASSIGN_BUFFERSIZE 8192 + +/* + * Broadcasts strides to match the given dimensions. Can be used, + * for instance, to set up a raw iteration. + * + * 'strides_name' is used to produce an error message if the strides + * cannot be broadcast. + * + * Returns 0 on success, -1 on failure. + */ +NPY_NO_EXPORT int +broadcast_strides(int ndim, npy_intp *shape, + int strides_ndim, npy_intp *strides_shape, npy_intp *strides, + char *strides_name, + npy_intp *out_strides); + +/* + * Checks whether a data pointer + set of strides refers to a raw + * array which is fully aligned data. + */ +NPY_NO_EXPORT int +raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment); + +/* Returns 1 if the arrays have overlapping data, 0 otherwise */ +NPY_NO_EXPORT int +arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2); + + +#endif diff --git a/numpy/core/src/common/cblasfuncs.c b/numpy/core/src/common/cblasfuncs.c new file mode 100644 index 000000000..6460c5db1 --- /dev/null +++ b/numpy/core/src/common/cblasfuncs.c @@ -0,0 +1,693 @@ +/* + * This module provides a BLAS optimized matrix multiply, + * inner product and dot for numpy arrays + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE + +#include <Python.h> +#include <assert.h> +#include <numpy/arrayobject.h> +#include "npy_cblas.h" +#include "arraytypes.h" +#include "common.h" + + +static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0}; +static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0}; + + +/* + * Helper: dispatch to appropriate cblas_?gemm for typenum. + */ +static void +gemm(int typenum, enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB, + int m, int n, int k, + PyArrayObject *A, int lda, PyArrayObject *B, int ldb, PyArrayObject *R) +{ + const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B); + void *Rdata = PyArray_DATA(R); + int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1; + + switch (typenum) { + case NPY_DOUBLE: + cblas_dgemm(order, transA, transB, m, n, k, 1., + Adata, lda, Bdata, ldb, 0., Rdata, ldc); + break; + case NPY_FLOAT: + cblas_sgemm(order, transA, transB, m, n, k, 1.f, + Adata, lda, Bdata, ldb, 0.f, Rdata, ldc); + break; + case NPY_CDOUBLE: + cblas_zgemm(order, transA, transB, m, n, k, oneD, + Adata, lda, Bdata, ldb, zeroD, Rdata, ldc); + break; + case NPY_CFLOAT: + cblas_cgemm(order, transA, transB, m, n, k, oneF, + Adata, lda, Bdata, ldb, zeroF, Rdata, ldc); + break; + } +} + + +/* + * Helper: dispatch to appropriate cblas_?gemv for typenum. + */ +static void +gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, + PyArrayObject *A, int lda, PyArrayObject *X, int incX, + PyArrayObject *R) +{ + const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X); + void *Rdata = PyArray_DATA(R); + + int m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1); + + switch (typenum) { + case NPY_DOUBLE: + cblas_dgemv(order, trans, m, n, 1., Adata, lda, Xdata, incX, + 0., Rdata, 1); + break; + case NPY_FLOAT: + cblas_sgemv(order, trans, m, n, 1.f, Adata, lda, Xdata, incX, + 0.f, Rdata, 1); + break; + case NPY_CDOUBLE: + cblas_zgemv(order, trans, m, n, oneD, Adata, lda, Xdata, incX, + zeroD, Rdata, 1); + break; + case NPY_CFLOAT: + cblas_cgemv(order, trans, m, n, oneF, Adata, lda, Xdata, incX, + zeroF, Rdata, 1); + break; + } +} + + +/* + * Helper: dispatch to appropriate cblas_?syrk for typenum. + */ +static void +syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, + int n, int k, + PyArrayObject *A, int lda, PyArrayObject *R) +{ + const void *Adata = PyArray_DATA(A); + void *Rdata = PyArray_DATA(R); + int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1; + + npy_intp i; + npy_intp j; + + switch (typenum) { + case NPY_DOUBLE: + cblas_dsyrk(order, CblasUpper, trans, n, k, 1., + Adata, lda, 0., Rdata, ldc); + + for (i = 0; i < n; i++) { + for (j = i + 1; j < n; j++) { + *((npy_double*)PyArray_GETPTR2(R, j, i)) = + *((npy_double*)PyArray_GETPTR2(R, i, j)); + } + } + break; + case NPY_FLOAT: + cblas_ssyrk(order, CblasUpper, trans, n, k, 1.f, + Adata, lda, 0.f, Rdata, ldc); + + for (i = 0; i < n; i++) { + for (j = i + 1; j < n; j++) { + *((npy_float*)PyArray_GETPTR2(R, j, i)) = + *((npy_float*)PyArray_GETPTR2(R, i, j)); + } + } + break; + case NPY_CDOUBLE: + cblas_zsyrk(order, CblasUpper, trans, n, k, oneD, + Adata, lda, zeroD, Rdata, ldc); + + for (i = 0; i < n; i++) { + for (j = i + 1; j < n; j++) { + *((npy_cdouble*)PyArray_GETPTR2(R, j, i)) = + *((npy_cdouble*)PyArray_GETPTR2(R, i, j)); + } + } + break; + case NPY_CFLOAT: + cblas_csyrk(order, CblasUpper, trans, n, k, oneF, + Adata, lda, zeroF, Rdata, ldc); + + for (i = 0; i < n; i++) { + for (j = i + 1; j < n; j++) { + *((npy_cfloat*)PyArray_GETPTR2(R, j, i)) = + *((npy_cfloat*)PyArray_GETPTR2(R, i, j)); + } + } + break; + } +} + + +typedef enum {_scalar, _column, _row, _matrix} MatrixShape; + + +static MatrixShape +_select_matrix_shape(PyArrayObject *array) +{ + switch (PyArray_NDIM(array)) { + case 0: + return _scalar; + case 1: + if (PyArray_DIM(array, 0) > 1) + return _column; + return _scalar; + case 2: + if (PyArray_DIM(array, 0) > 1) { + if (PyArray_DIM(array, 1) == 1) + return _column; + else + return _matrix; + } + if (PyArray_DIM(array, 1) == 1) + return _scalar; + return _row; + } + return _matrix; +} + + +/* + * This also makes sure that the data segment is aligned with + * an itemsize address as well by returning one if not true. + */ +static int +_bad_strides(PyArrayObject *ap) +{ + int itemsize = PyArray_ITEMSIZE(ap); + int i, N=PyArray_NDIM(ap); + npy_intp *strides = PyArray_STRIDES(ap); + + if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) { + return 1; + } + for (i = 0; i < N; i++) { + if ((strides[i] < 0) || (strides[i] % itemsize) != 0) { + return 1; + } + } + + return 0; +} + + +/* + * dot(a,b) + * Returns the dot product of a and b for arrays of floating point types. + * Like the generic numpy equivalent the product sum is over + * the last dimension of a and the second-to-last dimension of b. + * NB: The first argument is not conjugated.; + * + * This is for use by PyArray_MatrixProduct2. It is assumed on entry that + * the arrays ap1 and ap2 have a common data type given by typenum that is + * float, double, cfloat, or cdouble and have dimension <= 2. The + * __array_ufunc__ nonsense is also assumed to have been taken care of. + */ +NPY_NO_EXPORT PyObject * +cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2, + PyArrayObject *out) +{ + PyArrayObject *result = NULL, *out_buf = NULL; + int j, lda, ldb; + npy_intp l; + int nd; + npy_intp ap1stride = 0; + npy_intp dimensions[NPY_MAXDIMS]; + npy_intp numbytes; + MatrixShape ap1shape, ap2shape; + + if (_bad_strides(ap1)) { + PyObject *op1 = PyArray_NewCopy(ap1, NPY_ANYORDER); + + Py_DECREF(ap1); + ap1 = (PyArrayObject *)op1; + if (ap1 == NULL) { + goto fail; + } + } + if (_bad_strides(ap2)) { + PyObject *op2 = PyArray_NewCopy(ap2, NPY_ANYORDER); + + Py_DECREF(ap2); + ap2 = (PyArrayObject *)op2; + if (ap2 == NULL) { + goto fail; + } + } + ap1shape = _select_matrix_shape(ap1); + ap2shape = _select_matrix_shape(ap2); + + if (ap1shape == _scalar || ap2shape == _scalar) { + PyArrayObject *oap1, *oap2; + oap1 = ap1; oap2 = ap2; + /* One of ap1 or ap2 is a scalar */ + if (ap1shape == _scalar) { + /* Make ap2 the scalar */ + PyArrayObject *t = ap1; + ap1 = ap2; + ap2 = t; + ap1shape = ap2shape; + ap2shape = _scalar; + } + + if (ap1shape == _row) { + ap1stride = PyArray_STRIDE(ap1, 1); + } + else if (PyArray_NDIM(ap1) > 0) { + ap1stride = PyArray_STRIDE(ap1, 0); + } + + if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { + npy_intp *thisdims; + if (PyArray_NDIM(ap1) == 0) { + nd = PyArray_NDIM(ap2); + thisdims = PyArray_DIMS(ap2); + } + else { + nd = PyArray_NDIM(ap1); + thisdims = PyArray_DIMS(ap1); + } + l = 1; + for (j = 0; j < nd; j++) { + dimensions[j] = thisdims[j]; + l *= dimensions[j]; + } + } + else { + l = PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1); + + if (PyArray_DIM(oap2, 0) != l) { + dot_alignment_error(oap1, PyArray_NDIM(oap1) - 1, oap2, 0); + goto fail; + } + nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2; + /* + * nd = 0 or 1 or 2. If nd == 0 do nothing ... + */ + if (nd == 1) { + /* + * Either PyArray_NDIM(ap1) is 1 dim or PyArray_NDIM(ap2) is + * 1 dim and the other is 2 dim + */ + dimensions[0] = (PyArray_NDIM(oap1) == 2) ? + PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1); + l = dimensions[0]; + /* + * Fix it so that dot(shape=(N,1), shape=(1,)) + * and dot(shape=(1,), shape=(1,N)) both return + * an (N,) array (but use the fast scalar code) + */ + } + else if (nd == 2) { + dimensions[0] = PyArray_DIM(oap1, 0); + dimensions[1] = PyArray_DIM(oap2, 1); + /* + * We need to make sure that dot(shape=(1,1), shape=(1,N)) + * and dot(shape=(N,1),shape=(1,1)) uses + * scalar multiplication appropriately + */ + if (ap1shape == _row) { + l = dimensions[1]; + } + else { + l = dimensions[0]; + } + } + + /* Check if the summation dimension is 0-sized */ + if (PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1) == 0) { + l = 0; + } + } + } + else { + /* + * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2) + * Both ap1 and ap2 are vectors or matrices + */ + l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1); + + if (PyArray_DIM(ap2, 0) != l) { + dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, 0); + goto fail; + } + nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2; + + if (nd == 1) { + dimensions[0] = (PyArray_NDIM(ap1) == 2) ? + PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1); + } + else if (nd == 2) { + dimensions[0] = PyArray_DIM(ap1, 0); + dimensions[1] = PyArray_DIM(ap2, 1); + } + } + + out_buf = new_array_for_sum(ap1, ap2, out, nd, dimensions, typenum, &result); + if (out_buf == NULL) { + goto fail; + } + + numbytes = PyArray_NBYTES(out_buf); + memset(PyArray_DATA(out_buf), 0, numbytes); + if (numbytes == 0 || l == 0) { + Py_DECREF(ap1); + Py_DECREF(ap2); + Py_DECREF(out_buf); + return PyArray_Return(result); + } + + if (ap2shape == _scalar) { + /* + * Multiplication by a scalar -- Level 1 BLAS + * if ap1shape is a matrix and we are not contiguous, then we can't + * just blast through the entire array using a single striding factor + */ + NPY_BEGIN_ALLOW_THREADS; + + if (typenum == NPY_DOUBLE) { + if (l == 1) { + *((double *)PyArray_DATA(out_buf)) = *((double *)PyArray_DATA(ap2)) * + *((double *)PyArray_DATA(ap1)); + } + else if (ap1shape != _matrix) { + cblas_daxpy(l, + *((double *)PyArray_DATA(ap2)), + (double *)PyArray_DATA(ap1), + ap1stride/sizeof(double), + (double *)PyArray_DATA(out_buf), 1); + } + else { + int maxind, oind, i, a1s, outs; + char *ptr, *optr; + double val; + + maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); + oind = 1 - maxind; + ptr = PyArray_DATA(ap1); + optr = PyArray_DATA(out_buf); + l = PyArray_DIM(ap1, maxind); + val = *((double *)PyArray_DATA(ap2)); + a1s = PyArray_STRIDE(ap1, maxind) / sizeof(double); + outs = PyArray_STRIDE(out_buf, maxind) / sizeof(double); + for (i = 0; i < PyArray_DIM(ap1, oind); i++) { + cblas_daxpy(l, val, (double *)ptr, a1s, + (double *)optr, outs); + ptr += PyArray_STRIDE(ap1, oind); + optr += PyArray_STRIDE(out_buf, oind); + } + } + } + else if (typenum == NPY_CDOUBLE) { + if (l == 1) { + npy_cdouble *ptr1, *ptr2, *res; + + ptr1 = (npy_cdouble *)PyArray_DATA(ap2); + ptr2 = (npy_cdouble *)PyArray_DATA(ap1); + res = (npy_cdouble *)PyArray_DATA(out_buf); + res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; + res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; + } + else if (ap1shape != _matrix) { + cblas_zaxpy(l, + (double *)PyArray_DATA(ap2), + (double *)PyArray_DATA(ap1), + ap1stride/sizeof(npy_cdouble), + (double *)PyArray_DATA(out_buf), 1); + } + else { + int maxind, oind, i, a1s, outs; + char *ptr, *optr; + double *pval; + + maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); + oind = 1 - maxind; + ptr = PyArray_DATA(ap1); + optr = PyArray_DATA(out_buf); + l = PyArray_DIM(ap1, maxind); + pval = (double *)PyArray_DATA(ap2); + a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cdouble); + outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cdouble); + for (i = 0; i < PyArray_DIM(ap1, oind); i++) { + cblas_zaxpy(l, pval, (double *)ptr, a1s, + (double *)optr, outs); + ptr += PyArray_STRIDE(ap1, oind); + optr += PyArray_STRIDE(out_buf, oind); + } + } + } + else if (typenum == NPY_FLOAT) { + if (l == 1) { + *((float *)PyArray_DATA(out_buf)) = *((float *)PyArray_DATA(ap2)) * + *((float *)PyArray_DATA(ap1)); + } + else if (ap1shape != _matrix) { + cblas_saxpy(l, + *((float *)PyArray_DATA(ap2)), + (float *)PyArray_DATA(ap1), + ap1stride/sizeof(float), + (float *)PyArray_DATA(out_buf), 1); + } + else { + int maxind, oind, i, a1s, outs; + char *ptr, *optr; + float val; + + maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); + oind = 1 - maxind; + ptr = PyArray_DATA(ap1); + optr = PyArray_DATA(out_buf); + l = PyArray_DIM(ap1, maxind); + val = *((float *)PyArray_DATA(ap2)); + a1s = PyArray_STRIDE(ap1, maxind) / sizeof(float); + outs = PyArray_STRIDE(out_buf, maxind) / sizeof(float); + for (i = 0; i < PyArray_DIM(ap1, oind); i++) { + cblas_saxpy(l, val, (float *)ptr, a1s, + (float *)optr, outs); + ptr += PyArray_STRIDE(ap1, oind); + optr += PyArray_STRIDE(out_buf, oind); + } + } + } + else if (typenum == NPY_CFLOAT) { + if (l == 1) { + npy_cfloat *ptr1, *ptr2, *res; + + ptr1 = (npy_cfloat *)PyArray_DATA(ap2); + ptr2 = (npy_cfloat *)PyArray_DATA(ap1); + res = (npy_cfloat *)PyArray_DATA(out_buf); + res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag; + res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real; + } + else if (ap1shape != _matrix) { + cblas_caxpy(l, + (float *)PyArray_DATA(ap2), + (float *)PyArray_DATA(ap1), + ap1stride/sizeof(npy_cfloat), + (float *)PyArray_DATA(out_buf), 1); + } + else { + int maxind, oind, i, a1s, outs; + char *ptr, *optr; + float *pval; + + maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1); + oind = 1 - maxind; + ptr = PyArray_DATA(ap1); + optr = PyArray_DATA(out_buf); + l = PyArray_DIM(ap1, maxind); + pval = (float *)PyArray_DATA(ap2); + a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cfloat); + outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cfloat); + for (i = 0; i < PyArray_DIM(ap1, oind); i++) { + cblas_caxpy(l, pval, (float *)ptr, a1s, + (float *)optr, outs); + ptr += PyArray_STRIDE(ap1, oind); + optr += PyArray_STRIDE(out_buf, oind); + } + } + } + NPY_END_ALLOW_THREADS; + } + else if ((ap2shape == _column) && (ap1shape != _matrix)) { + NPY_BEGIN_ALLOW_THREADS; + + /* Dot product between two vectors -- Level 1 BLAS */ + PyArray_DESCR(out_buf)->f->dotfunc( + PyArray_DATA(ap1), PyArray_STRIDE(ap1, (ap1shape == _row)), + PyArray_DATA(ap2), PyArray_STRIDE(ap2, 0), + PyArray_DATA(out_buf), l, NULL); + NPY_END_ALLOW_THREADS; + } + else if (ap1shape == _matrix && ap2shape != _matrix) { + /* Matrix vector multiplication -- Level 2 BLAS */ + /* lda must be MAX(M,1) */ + enum CBLAS_ORDER Order; + int ap2s; + + if (!PyArray_ISONESEGMENT(ap1)) { + PyObject *new; + new = PyArray_Copy(ap1); + Py_DECREF(ap1); + ap1 = (PyArrayObject *)new; + if (new == NULL) { + goto fail; + } + } + NPY_BEGIN_ALLOW_THREADS + if (PyArray_ISCONTIGUOUS(ap1)) { + Order = CblasRowMajor; + lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1); + } + else { + Order = CblasColMajor; + lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1); + } + ap2s = PyArray_STRIDE(ap2, 0) / PyArray_ITEMSIZE(ap2); + gemv(typenum, Order, CblasNoTrans, ap1, lda, ap2, ap2s, out_buf); + NPY_END_ALLOW_THREADS; + } + else if (ap1shape != _matrix && ap2shape == _matrix) { + /* Vector matrix multiplication -- Level 2 BLAS */ + enum CBLAS_ORDER Order; + int ap1s; + + if (!PyArray_ISONESEGMENT(ap2)) { + PyObject *new; + new = PyArray_Copy(ap2); + Py_DECREF(ap2); + ap2 = (PyArrayObject *)new; + if (new == NULL) { + goto fail; + } + } + NPY_BEGIN_ALLOW_THREADS + if (PyArray_ISCONTIGUOUS(ap2)) { + Order = CblasRowMajor; + lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1); + } + else { + Order = CblasColMajor; + lda = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1); + } + if (ap1shape == _row) { + ap1s = PyArray_STRIDE(ap1, 1) / PyArray_ITEMSIZE(ap1); + } + else { + ap1s = PyArray_STRIDE(ap1, 0) / PyArray_ITEMSIZE(ap1); + } + gemv(typenum, Order, CblasTrans, ap2, lda, ap1, ap1s, out_buf); + NPY_END_ALLOW_THREADS; + } + else { + /* + * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2) + * Matrix matrix multiplication -- Level 3 BLAS + * L x M multiplied by M x N + */ + enum CBLAS_ORDER Order; + enum CBLAS_TRANSPOSE Trans1, Trans2; + int M, N, L; + + /* Optimization possible: */ + /* + * We may be able to handle single-segment arrays here + * using appropriate values of Order, Trans1, and Trans2. + */ + if (!PyArray_IS_C_CONTIGUOUS(ap2) && !PyArray_IS_F_CONTIGUOUS(ap2)) { + PyObject *new = PyArray_Copy(ap2); + + Py_DECREF(ap2); + ap2 = (PyArrayObject *)new; + if (new == NULL) { + goto fail; + } + } + if (!PyArray_IS_C_CONTIGUOUS(ap1) && !PyArray_IS_F_CONTIGUOUS(ap1)) { + PyObject *new = PyArray_Copy(ap1); + + Py_DECREF(ap1); + ap1 = (PyArrayObject *)new; + if (new == NULL) { + goto fail; + } + } + + NPY_BEGIN_ALLOW_THREADS; + + Order = CblasRowMajor; + Trans1 = CblasNoTrans; + Trans2 = CblasNoTrans; + L = PyArray_DIM(ap1, 0); + N = PyArray_DIM(ap2, 1); + M = PyArray_DIM(ap2, 0); + lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1); + ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1); + + /* + * Avoid temporary copies for arrays in Fortran order + */ + if (PyArray_IS_F_CONTIGUOUS(ap1)) { + Trans1 = CblasTrans; + lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1); + } + if (PyArray_IS_F_CONTIGUOUS(ap2)) { + Trans2 = CblasTrans; + ldb = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1); + } + + /* + * Use syrk if we have a case of a matrix times its transpose. + * Otherwise, use gemm for all other cases. + */ + if ( + (PyArray_BYTES(ap1) == PyArray_BYTES(ap2)) && + (PyArray_DIM(ap1, 0) == PyArray_DIM(ap2, 1)) && + (PyArray_DIM(ap1, 1) == PyArray_DIM(ap2, 0)) && + (PyArray_STRIDE(ap1, 0) == PyArray_STRIDE(ap2, 1)) && + (PyArray_STRIDE(ap1, 1) == PyArray_STRIDE(ap2, 0)) && + ((Trans1 == CblasTrans) ^ (Trans2 == CblasTrans)) && + ((Trans1 == CblasNoTrans) ^ (Trans2 == CblasNoTrans)) + ) { + if (Trans1 == CblasNoTrans) { + syrk(typenum, Order, Trans1, N, M, ap1, lda, out_buf); + } + else { + syrk(typenum, Order, Trans1, N, M, ap2, ldb, out_buf); + } + } + else { + gemm(typenum, Order, Trans1, Trans2, L, N, M, ap1, lda, ap2, ldb, + out_buf); + } + NPY_END_ALLOW_THREADS; + } + + + Py_DECREF(ap1); + Py_DECREF(ap2); + + /* Trigger possible copyback into `result` */ + PyArray_ResolveWritebackIfCopy(out_buf); + Py_DECREF(out_buf); + + return PyArray_Return(result); + +fail: + Py_XDECREF(ap1); + Py_XDECREF(ap2); + Py_XDECREF(out_buf); + Py_XDECREF(result); + return NULL; +} diff --git a/numpy/core/src/common/cblasfuncs.h b/numpy/core/src/common/cblasfuncs.h new file mode 100644 index 000000000..66ce4ca5b --- /dev/null +++ b/numpy/core/src/common/cblasfuncs.h @@ -0,0 +1,7 @@ +#ifndef _NPY_CBLASFUNCS_H_ +#define _NPY_CBLASFUNCS_H_ + +NPY_NO_EXPORT PyObject * +cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *); + +#endif diff --git a/numpy/core/src/common/python_xerbla.c b/numpy/core/src/common/python_xerbla.c new file mode 100644 index 000000000..bdf0b9058 --- /dev/null +++ b/numpy/core/src/common/python_xerbla.c @@ -0,0 +1,51 @@ +#include "Python.h" + +/* + * From f2c.h, this should be safe unless fortran is set to use 64 + * bit integers. We don't seem to have any good way to detect that. + */ +typedef int integer; + +/* + From the original manpage: + -------------------------- + XERBLA is an error handler for the LAPACK routines. + It is called by an LAPACK routine if an input parameter has an invalid value. + A message is printed and execution stops. + + Instead of printing a message and stopping the execution, a + ValueError is raised with the message. + + Parameters: + ----------- + srname: Subroutine name to use in error message, maximum six characters. + Spaces at the end are skipped. + info: Number of the invalid parameter. +*/ + +int xerbla_(char *srname, integer *info) +{ + static const char format[] = "On entry to %.*s" \ + " parameter number %d had an illegal value"; + char buf[sizeof(format) + 6 + 4]; /* 6 for name, 4 for param. num. */ + + int len = 0; /* length of subroutine name*/ +#ifdef WITH_THREAD + PyGILState_STATE save; +#endif + + while( len<6 && srname[len]!='\0' ) + len++; + while( len && srname[len-1]==' ' ) + len--; +#ifdef WITH_THREAD + save = PyGILState_Ensure(); +#endif + PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info); + PyErr_SetString(PyExc_ValueError, buf); +#ifdef WITH_THREAD + PyGILState_Release(save); +#endif + + return 0; +} diff --git a/numpy/core/src/common/ucsnarrow.c b/numpy/core/src/common/ucsnarrow.c new file mode 100644 index 000000000..8e293e9f2 --- /dev/null +++ b/numpy/core/src/common/ucsnarrow.c @@ -0,0 +1,174 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#include <locale.h> +#include <stdio.h> + +#define _MULTIARRAYMODULE +#include "numpy/arrayobject.h" +#include "numpy/npy_math.h" + +#include "npy_config.h" + +#include "npy_pycompat.h" +#include "ctors.h" + +/* + * Functions only needed on narrow builds of Python for converting back and + * forth between the NumPy Unicode data-type (always 4-bytes) and the + * Python Unicode scalar (2-bytes on a narrow build). + */ + +/* + * The ucs2 buffer must be large enough to hold 2*ucs4length characters + * due to the use of surrogate pairs. + * + * The return value is the number of ucs2 bytes used-up which + * is ucs4length + number of surrogate pairs found. + * + * Values above 0xffff are converted to surrogate pairs. + */ +NPY_NO_EXPORT int +PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length) +{ + int i; + int numucs2 = 0; + npy_ucs4 chr; + for (i = 0; i < ucs4length; i++) { + chr = *ucs4++; + if (chr > 0xffff) { + numucs2++; + chr -= 0x10000L; + *ucs2++ = 0xD800 + (Py_UNICODE) (chr >> 10); + *ucs2++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF); + } + else { + *ucs2++ = (Py_UNICODE) chr; + } + numucs2++; + } + return numucs2; +} + + +/* + * This converts a UCS2 buffer of the given length to UCS4 buffer. + * It converts up to ucs4len characters of UCS2 + * + * It returns the number of characters converted which can + * be less than ucs2len if there are surrogate pairs in ucs2. + * + * The return value is the actual size of the used part of the ucs4 buffer. + */ +NPY_NO_EXPORT int +PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len) +{ + int i; + npy_ucs4 chr; + Py_UNICODE ch; + int numchars=0; + + for (i = 0; (i < ucs2len) && (numchars < ucs4len); i++) { + ch = *ucs2++; + if (ch >= 0xd800 && ch <= 0xdfff) { + /* surrogate pair */ + chr = ((npy_ucs4)(ch-0xd800)) << 10; + chr += *ucs2++ + 0x2400; /* -0xdc00 + 0x10000 */ + i++; + } + else { + chr = (npy_ucs4) ch; + } + *ucs4++ = chr; + numchars++; + } + return numchars; +} + +/* + * Returns a PyUnicodeObject initialized from a buffer containing + * UCS4 unicode. + * + * Parameters + * ---------- + * src: char * + * Pointer to buffer containing UCS4 unicode. + * size: Py_ssize_t + * Size of buffer in bytes. + * swap: int + * If true, the data will be swapped. + * align: int + * If true, the data will be aligned. + * + * Returns + * ------- + * new_reference: PyUnicodeObject + */ +NPY_NO_EXPORT PyUnicodeObject * +PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align) +{ + Py_ssize_t ucs4len = size / sizeof(npy_ucs4); + npy_ucs4 *buf = (npy_ucs4 *)src; + int alloc = 0; + PyUnicodeObject *ret; + + /* swap and align if needed */ + if (swap || align) { + buf = (npy_ucs4 *)malloc(size); + if (buf == NULL) { + PyErr_NoMemory(); + goto fail; + } + alloc = 1; + memcpy(buf, src, size); + if (swap) { + byte_swap_vector(buf, ucs4len, sizeof(npy_ucs4)); + } + } + + /* trim trailing zeros */ + while (ucs4len > 0 && buf[ucs4len - 1] == 0) { + ucs4len--; + } + + /* produce PyUnicode object */ +#ifdef Py_UNICODE_WIDE + { + ret = (PyUnicodeObject *)PyUnicode_FromUnicode((Py_UNICODE*)buf, + (Py_ssize_t) ucs4len); + if (ret == NULL) { + goto fail; + } + } +#else + { + Py_ssize_t tmpsiz = 2 * sizeof(Py_UNICODE) * ucs4len; + Py_ssize_t ucs2len; + Py_UNICODE *tmp; + + if ((tmp = (Py_UNICODE *)malloc(tmpsiz)) == NULL) { + PyErr_NoMemory(); + goto fail; + } + ucs2len = PyUCS2Buffer_FromUCS4(tmp, buf, ucs4len); + ret = (PyUnicodeObject *)PyUnicode_FromUnicode(tmp, (Py_ssize_t) ucs2len); + free(tmp); + if (ret == NULL) { + goto fail; + } + } +#endif + + if (alloc) { + free(buf); + } + return ret; + +fail: + if (alloc) { + free(buf); + } + return NULL; +} diff --git a/numpy/core/src/common/ucsnarrow.h b/numpy/core/src/common/ucsnarrow.h new file mode 100644 index 000000000..fe31a5e25 --- /dev/null +++ b/numpy/core/src/common/ucsnarrow.h @@ -0,0 +1,13 @@ +#ifndef _NPY_UCSNARROW_H_ +#define _NPY_UCSNARROW_H_ + +NPY_NO_EXPORT int +PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length); + +NPY_NO_EXPORT int +PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len); + +NPY_NO_EXPORT PyUnicodeObject * +PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align); + +#endif |