summaryrefslogtreecommitdiff
path: root/numpy/core/src/common
diff options
context:
space:
mode:
authormattip <matti.picus@gmail.com>2018-06-25 12:35:18 -0700
committermattip <matti.picus@gmail.com>2018-08-21 20:06:17 +0300
commit1bca48bb13dff1324078ca7dd968623c2a7f923f (patch)
tree86bd3d7ffbd8a0bb68bfbb91fa2f6a06ccb6528c /numpy/core/src/common
parentbd4500fa91456230ac9d74c704956788fe3e6ac7 (diff)
downloadnumpy-1bca48bb13dff1324078ca7dd968623c2a7f923f.tar.gz
MAINT: move unchanging, non-api files to common
Diffstat (limited to 'numpy/core/src/common')
-rw-r--r--numpy/core/src/common/array_assign.c118
-rw-r--r--numpy/core/src/common/array_assign.h100
-rw-r--r--numpy/core/src/common/cblasfuncs.c693
-rw-r--r--numpy/core/src/common/cblasfuncs.h7
-rw-r--r--numpy/core/src/common/python_xerbla.c51
-rw-r--r--numpy/core/src/common/ucsnarrow.c174
-rw-r--r--numpy/core/src/common/ucsnarrow.h13
7 files changed, 1156 insertions, 0 deletions
diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c
new file mode 100644
index 000000000..a48e245d8
--- /dev/null
+++ b/numpy/core/src/common/array_assign.c
@@ -0,0 +1,118 @@
+/*
+ * This file implements some helper functions for the array assignment
+ * routines. The actual assignment routines are in array_assign_*.c
+ *
+ * Written by Mark Wiebe (mwwiebe@gmail.com)
+ * Copyright (c) 2011 by Enthought, Inc.
+ *
+ * See LICENSE.txt for the license.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include <numpy/ndarraytypes.h>
+
+#include "npy_config.h"
+#include "npy_pycompat.h"
+
+#include "shape.h"
+
+#include "array_assign.h"
+#include "common.h"
+#include "lowlevel_strided_loops.h"
+#include "mem_overlap.h"
+
+/* See array_assign.h for parameter documentation */
+NPY_NO_EXPORT int
+broadcast_strides(int ndim, npy_intp *shape,
+ int strides_ndim, npy_intp *strides_shape, npy_intp *strides,
+ char *strides_name,
+ npy_intp *out_strides)
+{
+ int idim, idim_start = ndim - strides_ndim;
+
+ /* Can't broadcast to fewer dimensions */
+ if (idim_start < 0) {
+ goto broadcast_error;
+ }
+
+ /*
+ * Process from the end to the start, so that 'strides' and 'out_strides'
+ * can point to the same memory.
+ */
+ for (idim = ndim - 1; idim >= idim_start; --idim) {
+ npy_intp strides_shape_value = strides_shape[idim - idim_start];
+ /* If it doesn't have dimension one, it must match */
+ if (strides_shape_value == 1) {
+ out_strides[idim] = 0;
+ }
+ else if (strides_shape_value != shape[idim]) {
+ goto broadcast_error;
+ }
+ else {
+ out_strides[idim] = strides[idim - idim_start];
+ }
+ }
+
+ /* New dimensions get a zero stride */
+ for (idim = 0; idim < idim_start; ++idim) {
+ out_strides[idim] = 0;
+ }
+
+ return 0;
+
+broadcast_error: {
+ PyObject *errmsg;
+
+ errmsg = PyUString_FromFormat("could not broadcast %s from shape ",
+ strides_name);
+ PyUString_ConcatAndDel(&errmsg,
+ build_shape_string(strides_ndim, strides_shape));
+ PyUString_ConcatAndDel(&errmsg,
+ PyUString_FromString(" into shape "));
+ PyUString_ConcatAndDel(&errmsg,
+ build_shape_string(ndim, shape));
+ PyErr_SetObject(PyExc_ValueError, errmsg);
+ Py_DECREF(errmsg);
+
+ return -1;
+ }
+}
+
+/* See array_assign.h for parameter documentation */
+NPY_NO_EXPORT int
+raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
+{
+ if (alignment > 1) {
+ npy_intp align_check = (npy_intp)data;
+ int idim;
+
+ for (idim = 0; idim < ndim; ++idim) {
+ align_check |= strides[idim];
+ }
+
+ return npy_is_aligned((void *)align_check, alignment);
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/* Returns 1 if the arrays have overlapping data, 0 otherwise */
+NPY_NO_EXPORT int
+arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2)
+{
+ mem_overlap_t result;
+
+ result = solve_may_share_memory(arr1, arr2, NPY_MAY_SHARE_BOUNDS);
+ if (result == MEM_OVERLAP_NO) {
+ return 0;
+ }
+ else {
+ return 1;
+ }
+}
diff --git a/numpy/core/src/common/array_assign.h b/numpy/core/src/common/array_assign.h
new file mode 100644
index 000000000..3fecff007
--- /dev/null
+++ b/numpy/core/src/common/array_assign.h
@@ -0,0 +1,100 @@
+#ifndef _NPY_PRIVATE__ARRAY_ASSIGN_H_
+#define _NPY_PRIVATE__ARRAY_ASSIGN_H_
+
+/*
+ * An array assignment function for copying arrays, treating the
+ * arrays as flat according to their respective ordering rules.
+ * This function makes a temporary copy of 'src' if 'src' and
+ * 'dst' overlap, to be able to handle views of the same data with
+ * different strides.
+ *
+ * dst: The destination array.
+ * dst_order: The rule for how 'dst' is to be made flat.
+ * src: The source array.
+ * src_order: The rule for how 'src' is to be made flat.
+ * casting: An exception is raised if the copy violates this
+ * casting rule.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+/* Not yet implemented
+NPY_NO_EXPORT int
+PyArray_AssignArrayAsFlat(PyArrayObject *dst, NPY_ORDER dst_order,
+ PyArrayObject *src, NPY_ORDER src_order,
+ NPY_CASTING casting,
+ npy_bool preservena, npy_bool *preservewhichna);
+*/
+
+NPY_NO_EXPORT int
+PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
+ PyArrayObject *wheremask,
+ NPY_CASTING casting);
+
+NPY_NO_EXPORT int
+PyArray_AssignRawScalar(PyArrayObject *dst,
+ PyArray_Descr *src_dtype, char *src_data,
+ PyArrayObject *wheremask,
+ NPY_CASTING casting);
+
+/******** LOW-LEVEL SCALAR TO ARRAY ASSIGNMENT ********/
+
+/*
+ * Assigns the scalar value to every element of the destination raw array.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+raw_array_assign_scalar(int ndim, npy_intp *shape,
+ PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides,
+ PyArray_Descr *src_dtype, char *src_data);
+
+/*
+ * Assigns the scalar value to every element of the destination raw array
+ * where the 'wheremask' value is True.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape,
+ PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides,
+ PyArray_Descr *src_dtype, char *src_data,
+ PyArray_Descr *wheremask_dtype, char *wheremask_data,
+ npy_intp *wheremask_strides);
+
+/******** LOW-LEVEL ARRAY MANIPULATION HELPERS ********/
+
+/*
+ * Internal detail of how much to buffer during array assignments which
+ * need it. This is for more complex NA masking operations where masks
+ * need to be inverted or combined together.
+ */
+#define NPY_ARRAY_ASSIGN_BUFFERSIZE 8192
+
+/*
+ * Broadcasts strides to match the given dimensions. Can be used,
+ * for instance, to set up a raw iteration.
+ *
+ * 'strides_name' is used to produce an error message if the strides
+ * cannot be broadcast.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+broadcast_strides(int ndim, npy_intp *shape,
+ int strides_ndim, npy_intp *strides_shape, npy_intp *strides,
+ char *strides_name,
+ npy_intp *out_strides);
+
+/*
+ * Checks whether a data pointer + set of strides refers to a raw
+ * array which is fully aligned data.
+ */
+NPY_NO_EXPORT int
+raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment);
+
+/* Returns 1 if the arrays have overlapping data, 0 otherwise */
+NPY_NO_EXPORT int
+arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2);
+
+
+#endif
diff --git a/numpy/core/src/common/cblasfuncs.c b/numpy/core/src/common/cblasfuncs.c
new file mode 100644
index 000000000..6460c5db1
--- /dev/null
+++ b/numpy/core/src/common/cblasfuncs.c
@@ -0,0 +1,693 @@
+/*
+ * This module provides a BLAS optimized matrix multiply,
+ * inner product and dot for numpy arrays
+ */
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include <Python.h>
+#include <assert.h>
+#include <numpy/arrayobject.h>
+#include "npy_cblas.h"
+#include "arraytypes.h"
+#include "common.h"
+
+
+static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
+static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
+
+
+/*
+ * Helper: dispatch to appropriate cblas_?gemm for typenum.
+ */
+static void
+gemm(int typenum, enum CBLAS_ORDER order,
+ enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
+ int m, int n, int k,
+ PyArrayObject *A, int lda, PyArrayObject *B, int ldb, PyArrayObject *R)
+{
+ const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B);
+ void *Rdata = PyArray_DATA(R);
+ int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
+
+ switch (typenum) {
+ case NPY_DOUBLE:
+ cblas_dgemm(order, transA, transB, m, n, k, 1.,
+ Adata, lda, Bdata, ldb, 0., Rdata, ldc);
+ break;
+ case NPY_FLOAT:
+ cblas_sgemm(order, transA, transB, m, n, k, 1.f,
+ Adata, lda, Bdata, ldb, 0.f, Rdata, ldc);
+ break;
+ case NPY_CDOUBLE:
+ cblas_zgemm(order, transA, transB, m, n, k, oneD,
+ Adata, lda, Bdata, ldb, zeroD, Rdata, ldc);
+ break;
+ case NPY_CFLOAT:
+ cblas_cgemm(order, transA, transB, m, n, k, oneF,
+ Adata, lda, Bdata, ldb, zeroF, Rdata, ldc);
+ break;
+ }
+}
+
+
+/*
+ * Helper: dispatch to appropriate cblas_?gemv for typenum.
+ */
+static void
+gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
+ PyArrayObject *A, int lda, PyArrayObject *X, int incX,
+ PyArrayObject *R)
+{
+ const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X);
+ void *Rdata = PyArray_DATA(R);
+
+ int m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);
+
+ switch (typenum) {
+ case NPY_DOUBLE:
+ cblas_dgemv(order, trans, m, n, 1., Adata, lda, Xdata, incX,
+ 0., Rdata, 1);
+ break;
+ case NPY_FLOAT:
+ cblas_sgemv(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
+ 0.f, Rdata, 1);
+ break;
+ case NPY_CDOUBLE:
+ cblas_zgemv(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
+ zeroD, Rdata, 1);
+ break;
+ case NPY_CFLOAT:
+ cblas_cgemv(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
+ zeroF, Rdata, 1);
+ break;
+ }
+}
+
+
+/*
+ * Helper: dispatch to appropriate cblas_?syrk for typenum.
+ */
+static void
+syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
+ int n, int k,
+ PyArrayObject *A, int lda, PyArrayObject *R)
+{
+ const void *Adata = PyArray_DATA(A);
+ void *Rdata = PyArray_DATA(R);
+ int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
+
+ npy_intp i;
+ npy_intp j;
+
+ switch (typenum) {
+ case NPY_DOUBLE:
+ cblas_dsyrk(order, CblasUpper, trans, n, k, 1.,
+ Adata, lda, 0., Rdata, ldc);
+
+ for (i = 0; i < n; i++) {
+ for (j = i + 1; j < n; j++) {
+ *((npy_double*)PyArray_GETPTR2(R, j, i)) =
+ *((npy_double*)PyArray_GETPTR2(R, i, j));
+ }
+ }
+ break;
+ case NPY_FLOAT:
+ cblas_ssyrk(order, CblasUpper, trans, n, k, 1.f,
+ Adata, lda, 0.f, Rdata, ldc);
+
+ for (i = 0; i < n; i++) {
+ for (j = i + 1; j < n; j++) {
+ *((npy_float*)PyArray_GETPTR2(R, j, i)) =
+ *((npy_float*)PyArray_GETPTR2(R, i, j));
+ }
+ }
+ break;
+ case NPY_CDOUBLE:
+ cblas_zsyrk(order, CblasUpper, trans, n, k, oneD,
+ Adata, lda, zeroD, Rdata, ldc);
+
+ for (i = 0; i < n; i++) {
+ for (j = i + 1; j < n; j++) {
+ *((npy_cdouble*)PyArray_GETPTR2(R, j, i)) =
+ *((npy_cdouble*)PyArray_GETPTR2(R, i, j));
+ }
+ }
+ break;
+ case NPY_CFLOAT:
+ cblas_csyrk(order, CblasUpper, trans, n, k, oneF,
+ Adata, lda, zeroF, Rdata, ldc);
+
+ for (i = 0; i < n; i++) {
+ for (j = i + 1; j < n; j++) {
+ *((npy_cfloat*)PyArray_GETPTR2(R, j, i)) =
+ *((npy_cfloat*)PyArray_GETPTR2(R, i, j));
+ }
+ }
+ break;
+ }
+}
+
+
+typedef enum {_scalar, _column, _row, _matrix} MatrixShape;
+
+
+static MatrixShape
+_select_matrix_shape(PyArrayObject *array)
+{
+ switch (PyArray_NDIM(array)) {
+ case 0:
+ return _scalar;
+ case 1:
+ if (PyArray_DIM(array, 0) > 1)
+ return _column;
+ return _scalar;
+ case 2:
+ if (PyArray_DIM(array, 0) > 1) {
+ if (PyArray_DIM(array, 1) == 1)
+ return _column;
+ else
+ return _matrix;
+ }
+ if (PyArray_DIM(array, 1) == 1)
+ return _scalar;
+ return _row;
+ }
+ return _matrix;
+}
+
+
+/*
+ * This also makes sure that the data segment is aligned with
+ * an itemsize address as well by returning one if not true.
+ */
+static int
+_bad_strides(PyArrayObject *ap)
+{
+ int itemsize = PyArray_ITEMSIZE(ap);
+ int i, N=PyArray_NDIM(ap);
+ npy_intp *strides = PyArray_STRIDES(ap);
+
+ if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) {
+ return 1;
+ }
+ for (i = 0; i < N; i++) {
+ if ((strides[i] < 0) || (strides[i] % itemsize) != 0) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * dot(a,b)
+ * Returns the dot product of a and b for arrays of floating point types.
+ * Like the generic numpy equivalent the product sum is over
+ * the last dimension of a and the second-to-last dimension of b.
+ * NB: The first argument is not conjugated.;
+ *
+ * This is for use by PyArray_MatrixProduct2. It is assumed on entry that
+ * the arrays ap1 and ap2 have a common data type given by typenum that is
+ * float, double, cfloat, or cdouble and have dimension <= 2. The
+ * __array_ufunc__ nonsense is also assumed to have been taken care of.
+ */
+NPY_NO_EXPORT PyObject *
+cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
+ PyArrayObject *out)
+{
+ PyArrayObject *result = NULL, *out_buf = NULL;
+ int j, lda, ldb;
+ npy_intp l;
+ int nd;
+ npy_intp ap1stride = 0;
+ npy_intp dimensions[NPY_MAXDIMS];
+ npy_intp numbytes;
+ MatrixShape ap1shape, ap2shape;
+
+ if (_bad_strides(ap1)) {
+ PyObject *op1 = PyArray_NewCopy(ap1, NPY_ANYORDER);
+
+ Py_DECREF(ap1);
+ ap1 = (PyArrayObject *)op1;
+ if (ap1 == NULL) {
+ goto fail;
+ }
+ }
+ if (_bad_strides(ap2)) {
+ PyObject *op2 = PyArray_NewCopy(ap2, NPY_ANYORDER);
+
+ Py_DECREF(ap2);
+ ap2 = (PyArrayObject *)op2;
+ if (ap2 == NULL) {
+ goto fail;
+ }
+ }
+ ap1shape = _select_matrix_shape(ap1);
+ ap2shape = _select_matrix_shape(ap2);
+
+ if (ap1shape == _scalar || ap2shape == _scalar) {
+ PyArrayObject *oap1, *oap2;
+ oap1 = ap1; oap2 = ap2;
+ /* One of ap1 or ap2 is a scalar */
+ if (ap1shape == _scalar) {
+ /* Make ap2 the scalar */
+ PyArrayObject *t = ap1;
+ ap1 = ap2;
+ ap2 = t;
+ ap1shape = ap2shape;
+ ap2shape = _scalar;
+ }
+
+ if (ap1shape == _row) {
+ ap1stride = PyArray_STRIDE(ap1, 1);
+ }
+ else if (PyArray_NDIM(ap1) > 0) {
+ ap1stride = PyArray_STRIDE(ap1, 0);
+ }
+
+ if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
+ npy_intp *thisdims;
+ if (PyArray_NDIM(ap1) == 0) {
+ nd = PyArray_NDIM(ap2);
+ thisdims = PyArray_DIMS(ap2);
+ }
+ else {
+ nd = PyArray_NDIM(ap1);
+ thisdims = PyArray_DIMS(ap1);
+ }
+ l = 1;
+ for (j = 0; j < nd; j++) {
+ dimensions[j] = thisdims[j];
+ l *= dimensions[j];
+ }
+ }
+ else {
+ l = PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1);
+
+ if (PyArray_DIM(oap2, 0) != l) {
+ dot_alignment_error(oap1, PyArray_NDIM(oap1) - 1, oap2, 0);
+ goto fail;
+ }
+ nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
+ /*
+ * nd = 0 or 1 or 2. If nd == 0 do nothing ...
+ */
+ if (nd == 1) {
+ /*
+ * Either PyArray_NDIM(ap1) is 1 dim or PyArray_NDIM(ap2) is
+ * 1 dim and the other is 2 dim
+ */
+ dimensions[0] = (PyArray_NDIM(oap1) == 2) ?
+ PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1);
+ l = dimensions[0];
+ /*
+ * Fix it so that dot(shape=(N,1), shape=(1,))
+ * and dot(shape=(1,), shape=(1,N)) both return
+ * an (N,) array (but use the fast scalar code)
+ */
+ }
+ else if (nd == 2) {
+ dimensions[0] = PyArray_DIM(oap1, 0);
+ dimensions[1] = PyArray_DIM(oap2, 1);
+ /*
+ * We need to make sure that dot(shape=(1,1), shape=(1,N))
+ * and dot(shape=(N,1),shape=(1,1)) uses
+ * scalar multiplication appropriately
+ */
+ if (ap1shape == _row) {
+ l = dimensions[1];
+ }
+ else {
+ l = dimensions[0];
+ }
+ }
+
+ /* Check if the summation dimension is 0-sized */
+ if (PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1) == 0) {
+ l = 0;
+ }
+ }
+ }
+ else {
+ /*
+ * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
+ * Both ap1 and ap2 are vectors or matrices
+ */
+ l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1);
+
+ if (PyArray_DIM(ap2, 0) != l) {
+ dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, 0);
+ goto fail;
+ }
+ nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
+
+ if (nd == 1) {
+ dimensions[0] = (PyArray_NDIM(ap1) == 2) ?
+ PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1);
+ }
+ else if (nd == 2) {
+ dimensions[0] = PyArray_DIM(ap1, 0);
+ dimensions[1] = PyArray_DIM(ap2, 1);
+ }
+ }
+
+ out_buf = new_array_for_sum(ap1, ap2, out, nd, dimensions, typenum, &result);
+ if (out_buf == NULL) {
+ goto fail;
+ }
+
+ numbytes = PyArray_NBYTES(out_buf);
+ memset(PyArray_DATA(out_buf), 0, numbytes);
+ if (numbytes == 0 || l == 0) {
+ Py_DECREF(ap1);
+ Py_DECREF(ap2);
+ Py_DECREF(out_buf);
+ return PyArray_Return(result);
+ }
+
+ if (ap2shape == _scalar) {
+ /*
+ * Multiplication by a scalar -- Level 1 BLAS
+ * if ap1shape is a matrix and we are not contiguous, then we can't
+ * just blast through the entire array using a single striding factor
+ */
+ NPY_BEGIN_ALLOW_THREADS;
+
+ if (typenum == NPY_DOUBLE) {
+ if (l == 1) {
+ *((double *)PyArray_DATA(out_buf)) = *((double *)PyArray_DATA(ap2)) *
+ *((double *)PyArray_DATA(ap1));
+ }
+ else if (ap1shape != _matrix) {
+ cblas_daxpy(l,
+ *((double *)PyArray_DATA(ap2)),
+ (double *)PyArray_DATA(ap1),
+ ap1stride/sizeof(double),
+ (double *)PyArray_DATA(out_buf), 1);
+ }
+ else {
+ int maxind, oind, i, a1s, outs;
+ char *ptr, *optr;
+ double val;
+
+ maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+ oind = 1 - maxind;
+ ptr = PyArray_DATA(ap1);
+ optr = PyArray_DATA(out_buf);
+ l = PyArray_DIM(ap1, maxind);
+ val = *((double *)PyArray_DATA(ap2));
+ a1s = PyArray_STRIDE(ap1, maxind) / sizeof(double);
+ outs = PyArray_STRIDE(out_buf, maxind) / sizeof(double);
+ for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+ cblas_daxpy(l, val, (double *)ptr, a1s,
+ (double *)optr, outs);
+ ptr += PyArray_STRIDE(ap1, oind);
+ optr += PyArray_STRIDE(out_buf, oind);
+ }
+ }
+ }
+ else if (typenum == NPY_CDOUBLE) {
+ if (l == 1) {
+ npy_cdouble *ptr1, *ptr2, *res;
+
+ ptr1 = (npy_cdouble *)PyArray_DATA(ap2);
+ ptr2 = (npy_cdouble *)PyArray_DATA(ap1);
+ res = (npy_cdouble *)PyArray_DATA(out_buf);
+ res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
+ res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
+ }
+ else if (ap1shape != _matrix) {
+ cblas_zaxpy(l,
+ (double *)PyArray_DATA(ap2),
+ (double *)PyArray_DATA(ap1),
+ ap1stride/sizeof(npy_cdouble),
+ (double *)PyArray_DATA(out_buf), 1);
+ }
+ else {
+ int maxind, oind, i, a1s, outs;
+ char *ptr, *optr;
+ double *pval;
+
+ maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+ oind = 1 - maxind;
+ ptr = PyArray_DATA(ap1);
+ optr = PyArray_DATA(out_buf);
+ l = PyArray_DIM(ap1, maxind);
+ pval = (double *)PyArray_DATA(ap2);
+ a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cdouble);
+ outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cdouble);
+ for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+ cblas_zaxpy(l, pval, (double *)ptr, a1s,
+ (double *)optr, outs);
+ ptr += PyArray_STRIDE(ap1, oind);
+ optr += PyArray_STRIDE(out_buf, oind);
+ }
+ }
+ }
+ else if (typenum == NPY_FLOAT) {
+ if (l == 1) {
+ *((float *)PyArray_DATA(out_buf)) = *((float *)PyArray_DATA(ap2)) *
+ *((float *)PyArray_DATA(ap1));
+ }
+ else if (ap1shape != _matrix) {
+ cblas_saxpy(l,
+ *((float *)PyArray_DATA(ap2)),
+ (float *)PyArray_DATA(ap1),
+ ap1stride/sizeof(float),
+ (float *)PyArray_DATA(out_buf), 1);
+ }
+ else {
+ int maxind, oind, i, a1s, outs;
+ char *ptr, *optr;
+ float val;
+
+ maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+ oind = 1 - maxind;
+ ptr = PyArray_DATA(ap1);
+ optr = PyArray_DATA(out_buf);
+ l = PyArray_DIM(ap1, maxind);
+ val = *((float *)PyArray_DATA(ap2));
+ a1s = PyArray_STRIDE(ap1, maxind) / sizeof(float);
+ outs = PyArray_STRIDE(out_buf, maxind) / sizeof(float);
+ for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+ cblas_saxpy(l, val, (float *)ptr, a1s,
+ (float *)optr, outs);
+ ptr += PyArray_STRIDE(ap1, oind);
+ optr += PyArray_STRIDE(out_buf, oind);
+ }
+ }
+ }
+ else if (typenum == NPY_CFLOAT) {
+ if (l == 1) {
+ npy_cfloat *ptr1, *ptr2, *res;
+
+ ptr1 = (npy_cfloat *)PyArray_DATA(ap2);
+ ptr2 = (npy_cfloat *)PyArray_DATA(ap1);
+ res = (npy_cfloat *)PyArray_DATA(out_buf);
+ res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
+ res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
+ }
+ else if (ap1shape != _matrix) {
+ cblas_caxpy(l,
+ (float *)PyArray_DATA(ap2),
+ (float *)PyArray_DATA(ap1),
+ ap1stride/sizeof(npy_cfloat),
+ (float *)PyArray_DATA(out_buf), 1);
+ }
+ else {
+ int maxind, oind, i, a1s, outs;
+ char *ptr, *optr;
+ float *pval;
+
+ maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+ oind = 1 - maxind;
+ ptr = PyArray_DATA(ap1);
+ optr = PyArray_DATA(out_buf);
+ l = PyArray_DIM(ap1, maxind);
+ pval = (float *)PyArray_DATA(ap2);
+ a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cfloat);
+ outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cfloat);
+ for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+ cblas_caxpy(l, pval, (float *)ptr, a1s,
+ (float *)optr, outs);
+ ptr += PyArray_STRIDE(ap1, oind);
+ optr += PyArray_STRIDE(out_buf, oind);
+ }
+ }
+ }
+ NPY_END_ALLOW_THREADS;
+ }
+ else if ((ap2shape == _column) && (ap1shape != _matrix)) {
+ NPY_BEGIN_ALLOW_THREADS;
+
+ /* Dot product between two vectors -- Level 1 BLAS */
+ PyArray_DESCR(out_buf)->f->dotfunc(
+ PyArray_DATA(ap1), PyArray_STRIDE(ap1, (ap1shape == _row)),
+ PyArray_DATA(ap2), PyArray_STRIDE(ap2, 0),
+ PyArray_DATA(out_buf), l, NULL);
+ NPY_END_ALLOW_THREADS;
+ }
+ else if (ap1shape == _matrix && ap2shape != _matrix) {
+ /* Matrix vector multiplication -- Level 2 BLAS */
+ /* lda must be MAX(M,1) */
+ enum CBLAS_ORDER Order;
+ int ap2s;
+
+ if (!PyArray_ISONESEGMENT(ap1)) {
+ PyObject *new;
+ new = PyArray_Copy(ap1);
+ Py_DECREF(ap1);
+ ap1 = (PyArrayObject *)new;
+ if (new == NULL) {
+ goto fail;
+ }
+ }
+ NPY_BEGIN_ALLOW_THREADS
+ if (PyArray_ISCONTIGUOUS(ap1)) {
+ Order = CblasRowMajor;
+ lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
+ }
+ else {
+ Order = CblasColMajor;
+ lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
+ }
+ ap2s = PyArray_STRIDE(ap2, 0) / PyArray_ITEMSIZE(ap2);
+ gemv(typenum, Order, CblasNoTrans, ap1, lda, ap2, ap2s, out_buf);
+ NPY_END_ALLOW_THREADS;
+ }
+ else if (ap1shape != _matrix && ap2shape == _matrix) {
+ /* Vector matrix multiplication -- Level 2 BLAS */
+ enum CBLAS_ORDER Order;
+ int ap1s;
+
+ if (!PyArray_ISONESEGMENT(ap2)) {
+ PyObject *new;
+ new = PyArray_Copy(ap2);
+ Py_DECREF(ap2);
+ ap2 = (PyArrayObject *)new;
+ if (new == NULL) {
+ goto fail;
+ }
+ }
+ NPY_BEGIN_ALLOW_THREADS
+ if (PyArray_ISCONTIGUOUS(ap2)) {
+ Order = CblasRowMajor;
+ lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
+ }
+ else {
+ Order = CblasColMajor;
+ lda = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
+ }
+ if (ap1shape == _row) {
+ ap1s = PyArray_STRIDE(ap1, 1) / PyArray_ITEMSIZE(ap1);
+ }
+ else {
+ ap1s = PyArray_STRIDE(ap1, 0) / PyArray_ITEMSIZE(ap1);
+ }
+ gemv(typenum, Order, CblasTrans, ap2, lda, ap1, ap1s, out_buf);
+ NPY_END_ALLOW_THREADS;
+ }
+ else {
+ /*
+ * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
+ * Matrix matrix multiplication -- Level 3 BLAS
+ * L x M multiplied by M x N
+ */
+ enum CBLAS_ORDER Order;
+ enum CBLAS_TRANSPOSE Trans1, Trans2;
+ int M, N, L;
+
+ /* Optimization possible: */
+ /*
+ * We may be able to handle single-segment arrays here
+ * using appropriate values of Order, Trans1, and Trans2.
+ */
+ if (!PyArray_IS_C_CONTIGUOUS(ap2) && !PyArray_IS_F_CONTIGUOUS(ap2)) {
+ PyObject *new = PyArray_Copy(ap2);
+
+ Py_DECREF(ap2);
+ ap2 = (PyArrayObject *)new;
+ if (new == NULL) {
+ goto fail;
+ }
+ }
+ if (!PyArray_IS_C_CONTIGUOUS(ap1) && !PyArray_IS_F_CONTIGUOUS(ap1)) {
+ PyObject *new = PyArray_Copy(ap1);
+
+ Py_DECREF(ap1);
+ ap1 = (PyArrayObject *)new;
+ if (new == NULL) {
+ goto fail;
+ }
+ }
+
+ NPY_BEGIN_ALLOW_THREADS;
+
+ Order = CblasRowMajor;
+ Trans1 = CblasNoTrans;
+ Trans2 = CblasNoTrans;
+ L = PyArray_DIM(ap1, 0);
+ N = PyArray_DIM(ap2, 1);
+ M = PyArray_DIM(ap2, 0);
+ lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
+ ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
+
+ /*
+ * Avoid temporary copies for arrays in Fortran order
+ */
+ if (PyArray_IS_F_CONTIGUOUS(ap1)) {
+ Trans1 = CblasTrans;
+ lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
+ }
+ if (PyArray_IS_F_CONTIGUOUS(ap2)) {
+ Trans2 = CblasTrans;
+ ldb = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
+ }
+
+ /*
+ * Use syrk if we have a case of a matrix times its transpose.
+ * Otherwise, use gemm for all other cases.
+ */
+ if (
+ (PyArray_BYTES(ap1) == PyArray_BYTES(ap2)) &&
+ (PyArray_DIM(ap1, 0) == PyArray_DIM(ap2, 1)) &&
+ (PyArray_DIM(ap1, 1) == PyArray_DIM(ap2, 0)) &&
+ (PyArray_STRIDE(ap1, 0) == PyArray_STRIDE(ap2, 1)) &&
+ (PyArray_STRIDE(ap1, 1) == PyArray_STRIDE(ap2, 0)) &&
+ ((Trans1 == CblasTrans) ^ (Trans2 == CblasTrans)) &&
+ ((Trans1 == CblasNoTrans) ^ (Trans2 == CblasNoTrans))
+ ) {
+ if (Trans1 == CblasNoTrans) {
+ syrk(typenum, Order, Trans1, N, M, ap1, lda, out_buf);
+ }
+ else {
+ syrk(typenum, Order, Trans1, N, M, ap2, ldb, out_buf);
+ }
+ }
+ else {
+ gemm(typenum, Order, Trans1, Trans2, L, N, M, ap1, lda, ap2, ldb,
+ out_buf);
+ }
+ NPY_END_ALLOW_THREADS;
+ }
+
+
+ Py_DECREF(ap1);
+ Py_DECREF(ap2);
+
+ /* Trigger possible copyback into `result` */
+ PyArray_ResolveWritebackIfCopy(out_buf);
+ Py_DECREF(out_buf);
+
+ return PyArray_Return(result);
+
+fail:
+ Py_XDECREF(ap1);
+ Py_XDECREF(ap2);
+ Py_XDECREF(out_buf);
+ Py_XDECREF(result);
+ return NULL;
+}
diff --git a/numpy/core/src/common/cblasfuncs.h b/numpy/core/src/common/cblasfuncs.h
new file mode 100644
index 000000000..66ce4ca5b
--- /dev/null
+++ b/numpy/core/src/common/cblasfuncs.h
@@ -0,0 +1,7 @@
+#ifndef _NPY_CBLASFUNCS_H_
+#define _NPY_CBLASFUNCS_H_
+
+NPY_NO_EXPORT PyObject *
+cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);
+
+#endif
diff --git a/numpy/core/src/common/python_xerbla.c b/numpy/core/src/common/python_xerbla.c
new file mode 100644
index 000000000..bdf0b9058
--- /dev/null
+++ b/numpy/core/src/common/python_xerbla.c
@@ -0,0 +1,51 @@
+#include "Python.h"
+
+/*
+ * From f2c.h, this should be safe unless fortran is set to use 64
+ * bit integers. We don't seem to have any good way to detect that.
+ */
+typedef int integer;
+
+/*
+ From the original manpage:
+ --------------------------
+ XERBLA is an error handler for the LAPACK routines.
+ It is called by an LAPACK routine if an input parameter has an invalid value.
+ A message is printed and execution stops.
+
+ Instead of printing a message and stopping the execution, a
+ ValueError is raised with the message.
+
+ Parameters:
+ -----------
+ srname: Subroutine name to use in error message, maximum six characters.
+ Spaces at the end are skipped.
+ info: Number of the invalid parameter.
+*/
+
+int xerbla_(char *srname, integer *info)
+{
+ static const char format[] = "On entry to %.*s" \
+ " parameter number %d had an illegal value";
+ char buf[sizeof(format) + 6 + 4]; /* 6 for name, 4 for param. num. */
+
+ int len = 0; /* length of subroutine name*/
+#ifdef WITH_THREAD
+ PyGILState_STATE save;
+#endif
+
+ while( len<6 && srname[len]!='\0' )
+ len++;
+ while( len && srname[len-1]==' ' )
+ len--;
+#ifdef WITH_THREAD
+ save = PyGILState_Ensure();
+#endif
+ PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info);
+ PyErr_SetString(PyExc_ValueError, buf);
+#ifdef WITH_THREAD
+ PyGILState_Release(save);
+#endif
+
+ return 0;
+}
diff --git a/numpy/core/src/common/ucsnarrow.c b/numpy/core/src/common/ucsnarrow.c
new file mode 100644
index 000000000..8e293e9f2
--- /dev/null
+++ b/numpy/core/src/common/ucsnarrow.c
@@ -0,0 +1,174 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include <locale.h>
+#include <stdio.h>
+
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
+
+#include "npy_config.h"
+
+#include "npy_pycompat.h"
+#include "ctors.h"
+
+/*
+ * Functions only needed on narrow builds of Python for converting back and
+ * forth between the NumPy Unicode data-type (always 4-bytes) and the
+ * Python Unicode scalar (2-bytes on a narrow build).
+ */
+
+/*
+ * The ucs2 buffer must be large enough to hold 2*ucs4length characters
+ * due to the use of surrogate pairs.
+ *
+ * The return value is the number of ucs2 bytes used-up which
+ * is ucs4length + number of surrogate pairs found.
+ *
+ * Values above 0xffff are converted to surrogate pairs.
+ */
+NPY_NO_EXPORT int
+PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length)
+{
+ int i;
+ int numucs2 = 0;
+ npy_ucs4 chr;
+ for (i = 0; i < ucs4length; i++) {
+ chr = *ucs4++;
+ if (chr > 0xffff) {
+ numucs2++;
+ chr -= 0x10000L;
+ *ucs2++ = 0xD800 + (Py_UNICODE) (chr >> 10);
+ *ucs2++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
+ }
+ else {
+ *ucs2++ = (Py_UNICODE) chr;
+ }
+ numucs2++;
+ }
+ return numucs2;
+}
+
+
+/*
+ * This converts a UCS2 buffer of the given length to UCS4 buffer.
+ * It converts up to ucs4len characters of UCS2
+ *
+ * It returns the number of characters converted which can
+ * be less than ucs2len if there are surrogate pairs in ucs2.
+ *
+ * The return value is the actual size of the used part of the ucs4 buffer.
+ */
+NPY_NO_EXPORT int
+PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len)
+{
+ int i;
+ npy_ucs4 chr;
+ Py_UNICODE ch;
+ int numchars=0;
+
+ for (i = 0; (i < ucs2len) && (numchars < ucs4len); i++) {
+ ch = *ucs2++;
+ if (ch >= 0xd800 && ch <= 0xdfff) {
+ /* surrogate pair */
+ chr = ((npy_ucs4)(ch-0xd800)) << 10;
+ chr += *ucs2++ + 0x2400; /* -0xdc00 + 0x10000 */
+ i++;
+ }
+ else {
+ chr = (npy_ucs4) ch;
+ }
+ *ucs4++ = chr;
+ numchars++;
+ }
+ return numchars;
+}
+
+/*
+ * Returns a PyUnicodeObject initialized from a buffer containing
+ * UCS4 unicode.
+ *
+ * Parameters
+ * ----------
+ * src: char *
+ * Pointer to buffer containing UCS4 unicode.
+ * size: Py_ssize_t
+ * Size of buffer in bytes.
+ * swap: int
+ * If true, the data will be swapped.
+ * align: int
+ * If true, the data will be aligned.
+ *
+ * Returns
+ * -------
+ * new_reference: PyUnicodeObject
+ */
+NPY_NO_EXPORT PyUnicodeObject *
+PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align)
+{
+ Py_ssize_t ucs4len = size / sizeof(npy_ucs4);
+ npy_ucs4 *buf = (npy_ucs4 *)src;
+ int alloc = 0;
+ PyUnicodeObject *ret;
+
+ /* swap and align if needed */
+ if (swap || align) {
+ buf = (npy_ucs4 *)malloc(size);
+ if (buf == NULL) {
+ PyErr_NoMemory();
+ goto fail;
+ }
+ alloc = 1;
+ memcpy(buf, src, size);
+ if (swap) {
+ byte_swap_vector(buf, ucs4len, sizeof(npy_ucs4));
+ }
+ }
+
+ /* trim trailing zeros */
+ while (ucs4len > 0 && buf[ucs4len - 1] == 0) {
+ ucs4len--;
+ }
+
+ /* produce PyUnicode object */
+#ifdef Py_UNICODE_WIDE
+ {
+ ret = (PyUnicodeObject *)PyUnicode_FromUnicode((Py_UNICODE*)buf,
+ (Py_ssize_t) ucs4len);
+ if (ret == NULL) {
+ goto fail;
+ }
+ }
+#else
+ {
+ Py_ssize_t tmpsiz = 2 * sizeof(Py_UNICODE) * ucs4len;
+ Py_ssize_t ucs2len;
+ Py_UNICODE *tmp;
+
+ if ((tmp = (Py_UNICODE *)malloc(tmpsiz)) == NULL) {
+ PyErr_NoMemory();
+ goto fail;
+ }
+ ucs2len = PyUCS2Buffer_FromUCS4(tmp, buf, ucs4len);
+ ret = (PyUnicodeObject *)PyUnicode_FromUnicode(tmp, (Py_ssize_t) ucs2len);
+ free(tmp);
+ if (ret == NULL) {
+ goto fail;
+ }
+ }
+#endif
+
+ if (alloc) {
+ free(buf);
+ }
+ return ret;
+
+fail:
+ if (alloc) {
+ free(buf);
+ }
+ return NULL;
+}
diff --git a/numpy/core/src/common/ucsnarrow.h b/numpy/core/src/common/ucsnarrow.h
new file mode 100644
index 000000000..fe31a5e25
--- /dev/null
+++ b/numpy/core/src/common/ucsnarrow.h
@@ -0,0 +1,13 @@
+#ifndef _NPY_UCSNARROW_H_
+#define _NPY_UCSNARROW_H_
+
+NPY_NO_EXPORT int
+PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length);
+
+NPY_NO_EXPORT int
+PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len);
+
+NPY_NO_EXPORT PyUnicodeObject *
+PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align);
+
+#endif