MAINT: move unchanging, non-api files to common

author: mattip <matti.picus@gmail.com> 2018-06-25 12:35:18 -0700
committer: mattip <matti.picus@gmail.com> 2018-08-21 20:06:17 +0300
commit: 1bca48bb13dff1324078ca7dd968623c2a7f923f (patch)
tree: 86bd3d7ffbd8a0bb68bfbb91fa2f6a06ccb6528c /numpy/core/src/common
parent: bd4500fa91456230ac9d74c704956788fe3e6ac7 (diff)
download: numpy-1bca48bb13dff1324078ca7dd968623c2a7f923f.tar.gz
7 files changed, 1156 insertions, 0 deletions
diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c
new file mode 100644
index 000000000..a48e245d8
--- /dev/null
+++ b/numpy/core/src/common/array_assign.c
@@ -0,0 +1,118 @@
+/*
+ * This file implements some helper functions for the array assignment
+ * routines. The actual assignment routines are in array_assign_*.c
+ *
+ * Written by Mark Wiebe (mwwiebe@gmail.com)
+ * Copyright (c) 2011 by Enthought, Inc.
+ *
+ * See LICENSE.txt for the license.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include <numpy/ndarraytypes.h>
+
+#include "npy_config.h"
+#include "npy_pycompat.h"
+
+#include "shape.h"
+
+#include "array_assign.h"
+#include "common.h"
+#include "lowlevel_strided_loops.h"
+#include "mem_overlap.h"
+
+/* See array_assign.h for parameter documentation */
+NPY_NO_EXPORT int
+broadcast_strides(int ndim, npy_intp *shape,
+                int strides_ndim, npy_intp *strides_shape, npy_intp *strides,
+                char *strides_name,
+                npy_intp *out_strides)
+{
+    int idim, idim_start = ndim - strides_ndim;
+
+    /* Can't broadcast to fewer dimensions */
+    if (idim_start < 0) {
+        goto broadcast_error;
+    }
+
+    /*
+     * Process from the end to the start, so that 'strides' and 'out_strides'
+     * can point to the same memory.
+     */
+    for (idim = ndim - 1; idim >= idim_start; --idim) {
+        npy_intp strides_shape_value = strides_shape[idim - idim_start];
+        /* If it doesn't have dimension one, it must match */
+        if (strides_shape_value == 1) {
+            out_strides[idim] = 0;
+        }
+        else if (strides_shape_value != shape[idim]) {
+            goto broadcast_error;
+        }
+        else {
+            out_strides[idim] = strides[idim - idim_start];
+        }
+    }
+
+    /* New dimensions get a zero stride */
+    for (idim = 0; idim < idim_start; ++idim) {
+        out_strides[idim] = 0;
+    }
+
+    return 0;
+
+broadcast_error: {
+        PyObject *errmsg;
+
+        errmsg = PyUString_FromFormat("could not broadcast %s from shape ",
+                                strides_name);
+        PyUString_ConcatAndDel(&errmsg,
+                build_shape_string(strides_ndim, strides_shape));
+        PyUString_ConcatAndDel(&errmsg,
+                PyUString_FromString(" into shape "));
+        PyUString_ConcatAndDel(&errmsg,
+                build_shape_string(ndim, shape));
+        PyErr_SetObject(PyExc_ValueError, errmsg);
+        Py_DECREF(errmsg);
+
+        return -1;
+   }
+}
+
+/* See array_assign.h for parameter documentation */
+NPY_NO_EXPORT int
+raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
+{
+    if (alignment > 1) {
+        npy_intp align_check = (npy_intp)data;
+        int idim;
+
+        for (idim = 0; idim < ndim; ++idim) {
+            align_check |= strides[idim];
+        }
+
+        return npy_is_aligned((void *)align_check, alignment);
+    }
+    else {
+        return 1;
+    }
+}
+
+
+/* Returns 1 if the arrays have overlapping data, 0 otherwise */
+NPY_NO_EXPORT int
+arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2)
+{
+    mem_overlap_t result;
+
+    result = solve_may_share_memory(arr1, arr2, NPY_MAY_SHARE_BOUNDS);
+    if (result == MEM_OVERLAP_NO) {
+        return 0;
+    }
+    else {
+        return 1;
+    }
+}
diff --git a/numpy/core/src/common/array_assign.h b/numpy/core/src/common/array_assign.h
new file mode 100644
index 000000000..3fecff007
--- /dev/null
+++ b/numpy/core/src/common/array_assign.h
@@ -0,0 +1,100 @@
+#ifndef _NPY_PRIVATE__ARRAY_ASSIGN_H_
+#define _NPY_PRIVATE__ARRAY_ASSIGN_H_
+
+/*
+ * An array assignment function for copying arrays, treating the
+ * arrays as flat according to their respective ordering rules.
+ * This function makes a temporary copy of 'src' if 'src' and
+ * 'dst' overlap, to be able to handle views of the same data with
+ * different strides.
+ *
+ * dst: The destination array.
+ * dst_order: The rule for how 'dst' is to be made flat.
+ * src: The source array.
+ * src_order: The rule for how 'src' is to be made flat.
+ * casting: An exception is raised if the copy violates this
+ *          casting rule.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+/* Not yet implemented
+NPY_NO_EXPORT int
+PyArray_AssignArrayAsFlat(PyArrayObject *dst, NPY_ORDER dst_order,
+                  PyArrayObject *src, NPY_ORDER src_order,
+                  NPY_CASTING casting,
+                  npy_bool preservena, npy_bool *preservewhichna);
+*/
+
+NPY_NO_EXPORT int
+PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
+                    PyArrayObject *wheremask,
+                    NPY_CASTING casting);
+
+NPY_NO_EXPORT int
+PyArray_AssignRawScalar(PyArrayObject *dst,
+                        PyArray_Descr *src_dtype, char *src_data,
+                        PyArrayObject *wheremask,
+                        NPY_CASTING casting);
+
+/******** LOW-LEVEL SCALAR TO ARRAY ASSIGNMENT ********/
+
+/*
+ * Assigns the scalar value to every element of the destination raw array.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+raw_array_assign_scalar(int ndim, npy_intp *shape,
+        PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides,
+        PyArray_Descr *src_dtype, char *src_data);
+
+/*
+ * Assigns the scalar value to every element of the destination raw array
+ * where the 'wheremask' value is True.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape,
+        PyArray_Descr *dst_dtype, char *dst_data, npy_intp *dst_strides,
+        PyArray_Descr *src_dtype, char *src_data,
+        PyArray_Descr *wheremask_dtype, char *wheremask_data,
+        npy_intp *wheremask_strides);
+
+/******** LOW-LEVEL ARRAY MANIPULATION HELPERS ********/
+
+/*
+ * Internal detail of how much to buffer during array assignments which
+ * need it. This is for more complex NA masking operations where masks
+ * need to be inverted or combined together.
+ */
+#define NPY_ARRAY_ASSIGN_BUFFERSIZE 8192
+
+/*
+ * Broadcasts strides to match the given dimensions. Can be used,
+ * for instance, to set up a raw iteration.
+ *
+ * 'strides_name' is used to produce an error message if the strides
+ * cannot be broadcast.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+NPY_NO_EXPORT int
+broadcast_strides(int ndim, npy_intp *shape,
+                int strides_ndim, npy_intp *strides_shape, npy_intp *strides,
+                char *strides_name,
+                npy_intp *out_strides);
+
+/*
+ * Checks whether a data pointer + set of strides refers to a raw
+ * array which is fully aligned data.
+ */
+NPY_NO_EXPORT int
+raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment);
+
+/* Returns 1 if the arrays have overlapping data, 0 otherwise */
+NPY_NO_EXPORT int
+arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2);
+
+
+#endif
diff --git a/numpy/core/src/common/cblasfuncs.c b/numpy/core/src/common/cblasfuncs.c
new file mode 100644
index 000000000..6460c5db1
--- /dev/null
+++ b/numpy/core/src/common/cblasfuncs.c
@@ -0,0 +1,693 @@
+/*
+ * This module provides a BLAS optimized matrix multiply,
+ * inner product and dot for numpy arrays
+ */
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include <Python.h>
+#include <assert.h>
+#include <numpy/arrayobject.h>
+#include "npy_cblas.h"
+#include "arraytypes.h"
+#include "common.h"
+
+
+static const double oneD[2] = {1.0, 0.0}, zeroD[2] = {0.0, 0.0};
+static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
+
+
+/*
+ * Helper: dispatch to appropriate cblas_?gemm for typenum.
+ */
+static void
+gemm(int typenum, enum CBLAS_ORDER order,
+     enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
+     int m, int n, int k,
+     PyArrayObject *A, int lda, PyArrayObject *B, int ldb, PyArrayObject *R)
+{
+    const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B);
+    void *Rdata = PyArray_DATA(R);
+    int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
+
+    switch (typenum) {
+        case NPY_DOUBLE:
+            cblas_dgemm(order, transA, transB, m, n, k, 1.,
+                        Adata, lda, Bdata, ldb, 0., Rdata, ldc);
+            break;
+        case NPY_FLOAT:
+            cblas_sgemm(order, transA, transB, m, n, k, 1.f,
+                        Adata, lda, Bdata, ldb, 0.f, Rdata, ldc);
+            break;
+        case NPY_CDOUBLE:
+            cblas_zgemm(order, transA, transB, m, n, k, oneD,
+                        Adata, lda, Bdata, ldb, zeroD, Rdata, ldc);
+            break;
+        case NPY_CFLOAT:
+            cblas_cgemm(order, transA, transB, m, n, k, oneF,
+                        Adata, lda, Bdata, ldb, zeroF, Rdata, ldc);
+            break;
+    }
+}
+
+
+/*
+ * Helper: dispatch to appropriate cblas_?gemv for typenum.
+ */
+static void
+gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
+     PyArrayObject *A, int lda, PyArrayObject *X, int incX,
+     PyArrayObject *R)
+{
+    const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X);
+    void *Rdata = PyArray_DATA(R);
+
+    int m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);
+
+    switch (typenum) {
+        case NPY_DOUBLE:
+            cblas_dgemv(order, trans, m, n, 1., Adata, lda, Xdata, incX,
+                        0., Rdata, 1);
+            break;
+        case NPY_FLOAT:
+            cblas_sgemv(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
+                        0.f, Rdata, 1);
+            break;
+        case NPY_CDOUBLE:
+            cblas_zgemv(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
+                        zeroD, Rdata, 1);
+            break;
+        case NPY_CFLOAT:
+            cblas_cgemv(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
+                        zeroF, Rdata, 1);
+            break;
+    }
+}
+
+
+/*
+ * Helper: dispatch to appropriate cblas_?syrk for typenum.
+ */
+static void
+syrk(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
+     int n, int k,
+     PyArrayObject *A, int lda, PyArrayObject *R)
+{
+    const void *Adata = PyArray_DATA(A);
+    void *Rdata = PyArray_DATA(R);
+    int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
+
+    npy_intp i;
+    npy_intp j;
+
+    switch (typenum) {
+        case NPY_DOUBLE:
+            cblas_dsyrk(order, CblasUpper, trans, n, k, 1.,
+                        Adata, lda, 0., Rdata, ldc);
+
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
+                    *((npy_double*)PyArray_GETPTR2(R, j, i)) =
+                            *((npy_double*)PyArray_GETPTR2(R, i, j));
+                }
+            }
+            break;
+        case NPY_FLOAT:
+            cblas_ssyrk(order, CblasUpper, trans, n, k, 1.f,
+                        Adata, lda, 0.f, Rdata, ldc);
+
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
+                    *((npy_float*)PyArray_GETPTR2(R, j, i)) =
+                            *((npy_float*)PyArray_GETPTR2(R, i, j));
+                }
+            }
+            break;
+        case NPY_CDOUBLE:
+            cblas_zsyrk(order, CblasUpper, trans, n, k, oneD,
+                        Adata, lda, zeroD, Rdata, ldc);
+
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
+                    *((npy_cdouble*)PyArray_GETPTR2(R, j, i)) =
+                            *((npy_cdouble*)PyArray_GETPTR2(R, i, j));
+                }
+            }
+            break;
+        case NPY_CFLOAT:
+            cblas_csyrk(order, CblasUpper, trans, n, k, oneF,
+                        Adata, lda, zeroF, Rdata, ldc);
+
+            for (i = 0; i < n; i++) {
+                for (j = i + 1; j < n; j++) {
+                    *((npy_cfloat*)PyArray_GETPTR2(R, j, i)) =
+                            *((npy_cfloat*)PyArray_GETPTR2(R, i, j));
+                }
+            }
+            break;
+    }
+}
+
+
+typedef enum {_scalar, _column, _row, _matrix} MatrixShape;
+
+
+static MatrixShape
+_select_matrix_shape(PyArrayObject *array)
+{
+    switch (PyArray_NDIM(array)) {
+        case 0:
+            return _scalar;
+        case 1:
+            if (PyArray_DIM(array, 0) > 1)
+                return _column;
+            return _scalar;
+        case 2:
+            if (PyArray_DIM(array, 0) > 1) {
+                if (PyArray_DIM(array, 1) == 1)
+                    return _column;
+                else
+                    return _matrix;
+            }
+            if (PyArray_DIM(array, 1) == 1)
+                return _scalar;
+            return _row;
+    }
+    return _matrix;
+}
+
+
+/*
+ * This also makes sure that the data segment is aligned with
+ * an itemsize address as well by returning one if not true.
+ */
+static int
+_bad_strides(PyArrayObject *ap)
+{
+    int itemsize = PyArray_ITEMSIZE(ap);
+    int i, N=PyArray_NDIM(ap);
+    npy_intp *strides = PyArray_STRIDES(ap);
+
+    if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) {
+        return 1;
+    }
+    for (i = 0; i < N; i++) {
+        if ((strides[i] < 0) || (strides[i] % itemsize) != 0) {
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+
+/*
+ * dot(a,b)
+ * Returns the dot product of a and b for arrays of floating point types.
+ * Like the generic numpy equivalent the product sum is over
+ * the last dimension of a and the second-to-last dimension of b.
+ * NB: The first argument is not conjugated.;
+ *
+ * This is for use by PyArray_MatrixProduct2. It is assumed on entry that
+ * the arrays ap1 and ap2 have a common data type given by typenum that is
+ * float, double, cfloat, or cdouble and have dimension <= 2. The
+ * __array_ufunc__ nonsense is also assumed to have been taken care of.
+ */
+NPY_NO_EXPORT PyObject *
+cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2,
+                    PyArrayObject *out)
+{
+    PyArrayObject *result = NULL, *out_buf = NULL;
+    int j, lda, ldb;
+    npy_intp l;
+    int nd;
+    npy_intp ap1stride = 0;
+    npy_intp dimensions[NPY_MAXDIMS];
+    npy_intp numbytes;
+    MatrixShape ap1shape, ap2shape;
+
+    if (_bad_strides(ap1)) {
+            PyObject *op1 = PyArray_NewCopy(ap1, NPY_ANYORDER);
+
+            Py_DECREF(ap1);
+            ap1 = (PyArrayObject *)op1;
+            if (ap1 == NULL) {
+                goto fail;
+            }
+    }
+    if (_bad_strides(ap2)) {
+            PyObject *op2 = PyArray_NewCopy(ap2, NPY_ANYORDER);
+
+            Py_DECREF(ap2);
+            ap2 = (PyArrayObject *)op2;
+            if (ap2 == NULL) {
+                goto fail;
+            }
+    }
+    ap1shape = _select_matrix_shape(ap1);
+    ap2shape = _select_matrix_shape(ap2);
+
+    if (ap1shape == _scalar || ap2shape == _scalar) {
+        PyArrayObject *oap1, *oap2;
+        oap1 = ap1; oap2 = ap2;
+        /* One of ap1 or ap2 is a scalar */
+        if (ap1shape == _scalar) {
+            /* Make ap2 the scalar */
+            PyArrayObject *t = ap1;
+            ap1 = ap2;
+            ap2 = t;
+            ap1shape = ap2shape;
+            ap2shape = _scalar;
+        }
+
+        if (ap1shape == _row) {
+            ap1stride = PyArray_STRIDE(ap1, 1);
+        }
+        else if (PyArray_NDIM(ap1) > 0) {
+            ap1stride = PyArray_STRIDE(ap1, 0);
+        }
+
+        if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
+            npy_intp *thisdims;
+            if (PyArray_NDIM(ap1) == 0) {
+                nd = PyArray_NDIM(ap2);
+                thisdims = PyArray_DIMS(ap2);
+            }
+            else {
+                nd = PyArray_NDIM(ap1);
+                thisdims = PyArray_DIMS(ap1);
+            }
+            l = 1;
+            for (j = 0; j < nd; j++) {
+                dimensions[j] = thisdims[j];
+                l *= dimensions[j];
+            }
+        }
+        else {
+            l = PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1);
+
+            if (PyArray_DIM(oap2, 0) != l) {
+                dot_alignment_error(oap1, PyArray_NDIM(oap1) - 1, oap2, 0);
+                goto fail;
+            }
+            nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
+            /*
+             * nd = 0 or 1 or 2. If nd == 0 do nothing ...
+             */
+            if (nd == 1) {
+                /*
+                 * Either PyArray_NDIM(ap1) is 1 dim or PyArray_NDIM(ap2) is
+                 * 1 dim and the other is 2 dim
+                 */
+                dimensions[0] = (PyArray_NDIM(oap1) == 2) ?
+                                PyArray_DIM(oap1, 0) : PyArray_DIM(oap2, 1);
+                l = dimensions[0];
+                /*
+                 * Fix it so that dot(shape=(N,1), shape=(1,))
+                 * and dot(shape=(1,), shape=(1,N)) both return
+                 * an (N,) array (but use the fast scalar code)
+                 */
+            }
+            else if (nd == 2) {
+                dimensions[0] = PyArray_DIM(oap1, 0);
+                dimensions[1] = PyArray_DIM(oap2, 1);
+                /*
+                 * We need to make sure that dot(shape=(1,1), shape=(1,N))
+                 * and dot(shape=(N,1),shape=(1,1)) uses
+                 * scalar multiplication appropriately
+                 */
+                if (ap1shape == _row) {
+                    l = dimensions[1];
+                }
+                else {
+                    l = dimensions[0];
+                }
+            }
+
+            /* Check if the summation dimension is 0-sized */
+            if (PyArray_DIM(oap1, PyArray_NDIM(oap1) - 1) == 0) {
+                l = 0;
+            }
+        }
+    }
+    else {
+        /*
+         * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
+         * Both ap1 and ap2 are vectors or matrices
+         */
+        l = PyArray_DIM(ap1, PyArray_NDIM(ap1) - 1);
+
+        if (PyArray_DIM(ap2, 0) != l) {
+            dot_alignment_error(ap1, PyArray_NDIM(ap1) - 1, ap2, 0);
+            goto fail;
+        }
+        nd = PyArray_NDIM(ap1) + PyArray_NDIM(ap2) - 2;
+
+        if (nd == 1) {
+            dimensions[0] = (PyArray_NDIM(ap1) == 2) ?
+                            PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 1);
+        }
+        else if (nd == 2) {
+            dimensions[0] = PyArray_DIM(ap1, 0);
+            dimensions[1] = PyArray_DIM(ap2, 1);
+        }
+    }
+
+    out_buf = new_array_for_sum(ap1, ap2, out, nd, dimensions, typenum, &result);
+    if (out_buf == NULL) {
+        goto fail;
+    }
+
+    numbytes = PyArray_NBYTES(out_buf);
+    memset(PyArray_DATA(out_buf), 0, numbytes);
+    if (numbytes == 0 || l == 0) {
+            Py_DECREF(ap1);
+            Py_DECREF(ap2);
+            Py_DECREF(out_buf);
+            return PyArray_Return(result);
+    }
+
+    if (ap2shape == _scalar) {
+        /*
+         * Multiplication by a scalar -- Level 1 BLAS
+         * if ap1shape is a matrix and we are not contiguous, then we can't
+         * just blast through the entire array using a single striding factor
+         */
+        NPY_BEGIN_ALLOW_THREADS;
+
+        if (typenum == NPY_DOUBLE) {
+            if (l == 1) {
+                *((double *)PyArray_DATA(out_buf)) = *((double *)PyArray_DATA(ap2)) *
+                                                 *((double *)PyArray_DATA(ap1));
+            }
+            else if (ap1shape != _matrix) {
+                cblas_daxpy(l,
+                            *((double *)PyArray_DATA(ap2)),
+                            (double *)PyArray_DATA(ap1),
+                            ap1stride/sizeof(double),
+                            (double *)PyArray_DATA(out_buf), 1);
+            }
+            else {
+                int maxind, oind, i, a1s, outs;
+                char *ptr, *optr;
+                double val;
+
+                maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+                oind = 1 - maxind;
+                ptr = PyArray_DATA(ap1);
+                optr = PyArray_DATA(out_buf);
+                l = PyArray_DIM(ap1, maxind);
+                val = *((double *)PyArray_DATA(ap2));
+                a1s = PyArray_STRIDE(ap1, maxind) / sizeof(double);
+                outs = PyArray_STRIDE(out_buf, maxind) / sizeof(double);
+                for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+                    cblas_daxpy(l, val, (double *)ptr, a1s,
+                                (double *)optr, outs);
+                    ptr += PyArray_STRIDE(ap1, oind);
+                    optr += PyArray_STRIDE(out_buf, oind);
+                }
+            }
+        }
+        else if (typenum == NPY_CDOUBLE) {
+            if (l == 1) {
+                npy_cdouble *ptr1, *ptr2, *res;
+
+                ptr1 = (npy_cdouble *)PyArray_DATA(ap2);
+                ptr2 = (npy_cdouble *)PyArray_DATA(ap1);
+                res = (npy_cdouble *)PyArray_DATA(out_buf);
+                res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
+                res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
+            }
+            else if (ap1shape != _matrix) {
+                cblas_zaxpy(l,
+                            (double *)PyArray_DATA(ap2),
+                            (double *)PyArray_DATA(ap1),
+                            ap1stride/sizeof(npy_cdouble),
+                            (double *)PyArray_DATA(out_buf), 1);
+            }
+            else {
+                int maxind, oind, i, a1s, outs;
+                char *ptr, *optr;
+                double *pval;
+
+                maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+                oind = 1 - maxind;
+                ptr = PyArray_DATA(ap1);
+                optr = PyArray_DATA(out_buf);
+                l = PyArray_DIM(ap1, maxind);
+                pval = (double *)PyArray_DATA(ap2);
+                a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cdouble);
+                outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cdouble);
+                for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+                    cblas_zaxpy(l, pval, (double *)ptr, a1s,
+                                (double *)optr, outs);
+                    ptr += PyArray_STRIDE(ap1, oind);
+                    optr += PyArray_STRIDE(out_buf, oind);
+                }
+            }
+        }
+        else if (typenum == NPY_FLOAT) {
+            if (l == 1) {
+                *((float *)PyArray_DATA(out_buf)) = *((float *)PyArray_DATA(ap2)) *
+                    *((float *)PyArray_DATA(ap1));
+            }
+            else if (ap1shape != _matrix) {
+                cblas_saxpy(l,
+                            *((float *)PyArray_DATA(ap2)),
+                            (float *)PyArray_DATA(ap1),
+                            ap1stride/sizeof(float),
+                            (float *)PyArray_DATA(out_buf), 1);
+            }
+            else {
+                int maxind, oind, i, a1s, outs;
+                char *ptr, *optr;
+                float val;
+
+                maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+                oind = 1 - maxind;
+                ptr = PyArray_DATA(ap1);
+                optr = PyArray_DATA(out_buf);
+                l = PyArray_DIM(ap1, maxind);
+                val = *((float *)PyArray_DATA(ap2));
+                a1s = PyArray_STRIDE(ap1, maxind) / sizeof(float);
+                outs = PyArray_STRIDE(out_buf, maxind) / sizeof(float);
+                for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+                    cblas_saxpy(l, val, (float *)ptr, a1s,
+                                (float *)optr, outs);
+                    ptr += PyArray_STRIDE(ap1, oind);
+                    optr += PyArray_STRIDE(out_buf, oind);
+                }
+            }
+        }
+        else if (typenum == NPY_CFLOAT) {
+            if (l == 1) {
+                npy_cfloat *ptr1, *ptr2, *res;
+
+                ptr1 = (npy_cfloat *)PyArray_DATA(ap2);
+                ptr2 = (npy_cfloat *)PyArray_DATA(ap1);
+                res = (npy_cfloat *)PyArray_DATA(out_buf);
+                res->real = ptr1->real * ptr2->real - ptr1->imag * ptr2->imag;
+                res->imag = ptr1->real * ptr2->imag + ptr1->imag * ptr2->real;
+            }
+            else if (ap1shape != _matrix) {
+                cblas_caxpy(l,
+                            (float *)PyArray_DATA(ap2),
+                            (float *)PyArray_DATA(ap1),
+                            ap1stride/sizeof(npy_cfloat),
+                            (float *)PyArray_DATA(out_buf), 1);
+            }
+            else {
+                int maxind, oind, i, a1s, outs;
+                char *ptr, *optr;
+                float *pval;
+
+                maxind = (PyArray_DIM(ap1, 0) >= PyArray_DIM(ap1, 1) ? 0 : 1);
+                oind = 1 - maxind;
+                ptr = PyArray_DATA(ap1);
+                optr = PyArray_DATA(out_buf);
+                l = PyArray_DIM(ap1, maxind);
+                pval = (float *)PyArray_DATA(ap2);
+                a1s = PyArray_STRIDE(ap1, maxind) / sizeof(npy_cfloat);
+                outs = PyArray_STRIDE(out_buf, maxind) / sizeof(npy_cfloat);
+                for (i = 0; i < PyArray_DIM(ap1, oind); i++) {
+                    cblas_caxpy(l, pval, (float *)ptr, a1s,
+                                (float *)optr, outs);
+                    ptr += PyArray_STRIDE(ap1, oind);
+                    optr += PyArray_STRIDE(out_buf, oind);
+                }
+            }
+        }
+        NPY_END_ALLOW_THREADS;
+    }
+    else if ((ap2shape == _column) && (ap1shape != _matrix)) {
+        NPY_BEGIN_ALLOW_THREADS;
+
+        /* Dot product between two vectors -- Level 1 BLAS */
+        PyArray_DESCR(out_buf)->f->dotfunc(
+                 PyArray_DATA(ap1), PyArray_STRIDE(ap1, (ap1shape == _row)),
+                 PyArray_DATA(ap2), PyArray_STRIDE(ap2, 0),
+                 PyArray_DATA(out_buf), l, NULL);
+        NPY_END_ALLOW_THREADS;
+    }
+    else if (ap1shape == _matrix && ap2shape != _matrix) {
+        /* Matrix vector multiplication -- Level 2 BLAS */
+        /* lda must be MAX(M,1) */
+        enum CBLAS_ORDER Order;
+        int ap2s;
+
+        if (!PyArray_ISONESEGMENT(ap1)) {
+            PyObject *new;
+            new = PyArray_Copy(ap1);
+            Py_DECREF(ap1);
+            ap1 = (PyArrayObject *)new;
+            if (new == NULL) {
+                goto fail;
+            }
+        }
+        NPY_BEGIN_ALLOW_THREADS
+        if (PyArray_ISCONTIGUOUS(ap1)) {
+            Order = CblasRowMajor;
+            lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
+        }
+        else {
+            Order = CblasColMajor;
+            lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
+        }
+        ap2s = PyArray_STRIDE(ap2, 0) / PyArray_ITEMSIZE(ap2);
+        gemv(typenum, Order, CblasNoTrans, ap1, lda, ap2, ap2s, out_buf);
+        NPY_END_ALLOW_THREADS;
+    }
+    else if (ap1shape != _matrix && ap2shape == _matrix) {
+        /* Vector matrix multiplication -- Level 2 BLAS */
+        enum CBLAS_ORDER Order;
+        int ap1s;
+
+        if (!PyArray_ISONESEGMENT(ap2)) {
+            PyObject *new;
+            new = PyArray_Copy(ap2);
+            Py_DECREF(ap2);
+            ap2 = (PyArrayObject *)new;
+            if (new == NULL) {
+                goto fail;
+            }
+        }
+        NPY_BEGIN_ALLOW_THREADS
+        if (PyArray_ISCONTIGUOUS(ap2)) {
+            Order = CblasRowMajor;
+            lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
+        }
+        else {
+            Order = CblasColMajor;
+            lda = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
+        }
+        if (ap1shape == _row) {
+            ap1s = PyArray_STRIDE(ap1, 1) / PyArray_ITEMSIZE(ap1);
+        }
+        else {
+            ap1s = PyArray_STRIDE(ap1, 0) / PyArray_ITEMSIZE(ap1);
+        }
+        gemv(typenum, Order, CblasTrans, ap2, lda, ap1, ap1s, out_buf);
+        NPY_END_ALLOW_THREADS;
+    }
+    else {
+        /*
+         * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
+         * Matrix matrix multiplication -- Level 3 BLAS
+         *  L x M  multiplied by M x N
+         */
+        enum CBLAS_ORDER Order;
+        enum CBLAS_TRANSPOSE Trans1, Trans2;
+        int M, N, L;
+
+        /* Optimization possible: */
+        /*
+         * We may be able to handle single-segment arrays here
+         * using appropriate values of Order, Trans1, and Trans2.
+         */
+        if (!PyArray_IS_C_CONTIGUOUS(ap2) && !PyArray_IS_F_CONTIGUOUS(ap2)) {
+            PyObject *new = PyArray_Copy(ap2);
+
+            Py_DECREF(ap2);
+            ap2 = (PyArrayObject *)new;
+            if (new == NULL) {
+                goto fail;
+            }
+        }
+        if (!PyArray_IS_C_CONTIGUOUS(ap1) && !PyArray_IS_F_CONTIGUOUS(ap1)) {
+            PyObject *new = PyArray_Copy(ap1);
+
+            Py_DECREF(ap1);
+            ap1 = (PyArrayObject *)new;
+            if (new == NULL) {
+                goto fail;
+            }
+        }
+
+        NPY_BEGIN_ALLOW_THREADS;
+
+        Order = CblasRowMajor;
+        Trans1 = CblasNoTrans;
+        Trans2 = CblasNoTrans;
+        L = PyArray_DIM(ap1, 0);
+        N = PyArray_DIM(ap2, 1);
+        M = PyArray_DIM(ap2, 0);
+        lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
+        ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
+
+        /*
+         * Avoid temporary copies for arrays in Fortran order
+         */
+        if (PyArray_IS_F_CONTIGUOUS(ap1)) {
+            Trans1 = CblasTrans;
+            lda = (PyArray_DIM(ap1, 0) > 1 ? PyArray_DIM(ap1, 0) : 1);
+        }
+        if (PyArray_IS_F_CONTIGUOUS(ap2)) {
+            Trans2 = CblasTrans;
+            ldb = (PyArray_DIM(ap2, 0) > 1 ? PyArray_DIM(ap2, 0) : 1);
+        }
+
+        /*
+         * Use syrk if we have a case of a matrix times its transpose.
+         * Otherwise, use gemm for all other cases.
+         */
+        if (
+            (PyArray_BYTES(ap1) == PyArray_BYTES(ap2)) &&
+            (PyArray_DIM(ap1, 0) == PyArray_DIM(ap2, 1)) &&
+            (PyArray_DIM(ap1, 1) == PyArray_DIM(ap2, 0)) &&
+            (PyArray_STRIDE(ap1, 0) == PyArray_STRIDE(ap2, 1)) &&
+            (PyArray_STRIDE(ap1, 1) == PyArray_STRIDE(ap2, 0)) &&
+            ((Trans1 == CblasTrans) ^ (Trans2 == CblasTrans)) &&
+            ((Trans1 == CblasNoTrans) ^ (Trans2 == CblasNoTrans))
+        ) {
+            if (Trans1 == CblasNoTrans) {
+                syrk(typenum, Order, Trans1, N, M, ap1, lda, out_buf);
+            }
+            else {
+                syrk(typenum, Order, Trans1, N, M, ap2, ldb, out_buf);
+            }
+        }
+        else {
+            gemm(typenum, Order, Trans1, Trans2, L, N, M, ap1, lda, ap2, ldb,
+                 out_buf);
+        }
+        NPY_END_ALLOW_THREADS;
+    }
+
+
+    Py_DECREF(ap1);
+    Py_DECREF(ap2);
+
+    /* Trigger possible copyback into `result` */
+    PyArray_ResolveWritebackIfCopy(out_buf);
+    Py_DECREF(out_buf);
+
+    return PyArray_Return(result);
+
+fail:
+    Py_XDECREF(ap1);
+    Py_XDECREF(ap2);
+    Py_XDECREF(out_buf);
+    Py_XDECREF(result);
+    return NULL;
+}
diff --git a/numpy/core/src/common/cblasfuncs.h b/numpy/core/src/common/cblasfuncs.h
new file mode 100644
index 000000000..66ce4ca5b
--- /dev/null
+++ b/numpy/core/src/common/cblasfuncs.h
@@ -0,0 +1,7 @@
+#ifndef _NPY_CBLASFUNCS_H_
+#define _NPY_CBLASFUNCS_H_
+
+NPY_NO_EXPORT PyObject *
+cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);
+
+#endif
diff --git a/numpy/core/src/common/python_xerbla.c b/numpy/core/src/common/python_xerbla.c
new file mode 100644
index 000000000..bdf0b9058
--- /dev/null
+++ b/numpy/core/src/common/python_xerbla.c
@@ -0,0 +1,51 @@
+#include "Python.h"
+
+/*
+ * From f2c.h, this should be safe unless fortran is set to use 64
+ * bit integers. We don't seem to have any good way to detect that.
+ */
+typedef int integer;
+
+/*
+  From the original manpage:
+  --------------------------
+  XERBLA is an error handler for the LAPACK routines.
+  It is called by an LAPACK routine if an input parameter has an invalid value.
+  A message is printed and execution stops.
+
+  Instead of printing a message and stopping the execution, a
+  ValueError is raised with the message.
+
+  Parameters:
+  -----------
+  srname: Subroutine name to use in error message, maximum six characters.
+          Spaces at the end are skipped.
+  info: Number of the invalid parameter.
+*/
+
+int xerbla_(char *srname, integer *info)
+{
+        static const char format[] = "On entry to %.*s" \
+                " parameter number %d had an illegal value";
+        char buf[sizeof(format) + 6 + 4];   /* 6 for name, 4 for param. num. */
+
+        int len = 0; /* length of subroutine name*/
+#ifdef WITH_THREAD
+        PyGILState_STATE save;
+#endif
+
+        while( len<6 && srname[len]!='\0' )
+                len++;
+        while( len && srname[len-1]==' ' )
+                len--;
+#ifdef WITH_THREAD
+        save = PyGILState_Ensure();
+#endif
+        PyOS_snprintf(buf, sizeof(buf), format, len, srname, *info);
+        PyErr_SetString(PyExc_ValueError, buf);
+#ifdef WITH_THREAD
+        PyGILState_Release(save);
+#endif
+
+        return 0;
+}
diff --git a/numpy/core/src/common/ucsnarrow.c b/numpy/core/src/common/ucsnarrow.c
new file mode 100644
index 000000000..8e293e9f2
--- /dev/null
+++ b/numpy/core/src/common/ucsnarrow.c
@@ -0,0 +1,174 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include <locale.h>
+#include <stdio.h>
+
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
+
+#include "npy_config.h"
+
+#include "npy_pycompat.h"
+#include "ctors.h"
+
+/*
+ * Functions only needed on narrow builds of Python for converting back and
+ * forth between the NumPy Unicode data-type (always 4-bytes) and the
+ * Python Unicode scalar (2-bytes on a narrow build).
+ */
+
+/*
+ * The ucs2 buffer must be large enough to hold 2*ucs4length characters
+ * due to the use of surrogate pairs.
+ *
+ * The return value is the number of ucs2 bytes used-up which
+ * is ucs4length + number of surrogate pairs found.
+ *
+ * Values above 0xffff are converted to surrogate pairs.
+ */
+NPY_NO_EXPORT int
+PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length)
+{
+    int i;
+    int numucs2 = 0;
+    npy_ucs4 chr;
+    for (i = 0; i < ucs4length; i++) {
+        chr = *ucs4++;
+        if (chr > 0xffff) {
+            numucs2++;
+            chr -= 0x10000L;
+            *ucs2++ = 0xD800 + (Py_UNICODE) (chr >> 10);
+            *ucs2++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
+        }
+        else {
+            *ucs2++ = (Py_UNICODE) chr;
+        }
+        numucs2++;
+    }
+    return numucs2;
+}
+
+
+/*
+ * This converts a UCS2 buffer of the given length to UCS4 buffer.
+ * It converts up to ucs4len characters of UCS2
+ *
+ * It returns the number of characters converted which can
+ * be less than ucs2len if there are surrogate pairs in ucs2.
+ *
+ * The return value is the actual size of the used part of the ucs4 buffer.
+ */
+NPY_NO_EXPORT int
+PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len)
+{
+    int i;
+    npy_ucs4 chr;
+    Py_UNICODE ch;
+    int numchars=0;
+
+    for (i = 0; (i < ucs2len) && (numchars < ucs4len); i++) {
+        ch = *ucs2++;
+        if (ch >= 0xd800 && ch <= 0xdfff) {
+            /* surrogate pair */
+            chr = ((npy_ucs4)(ch-0xd800)) << 10;
+            chr += *ucs2++ + 0x2400;  /* -0xdc00 + 0x10000 */
+            i++;
+        }
+        else {
+            chr = (npy_ucs4) ch;
+        }
+        *ucs4++ = chr;
+        numchars++;
+    }
+    return numchars;
+}
+
+/*
+ * Returns a PyUnicodeObject initialized from a buffer containing
+ * UCS4 unicode.
+ *
+ * Parameters
+ * ----------
+ *  src: char *
+ *      Pointer to buffer containing UCS4 unicode.
+ *  size: Py_ssize_t
+ *      Size of buffer in bytes.
+ *  swap: int
+ *      If true, the data will be swapped.
+ *  align: int
+ *      If true, the data will be aligned.
+ *
+ * Returns
+ * -------
+ * new_reference: PyUnicodeObject
+ */
+NPY_NO_EXPORT PyUnicodeObject *
+PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align)
+{
+    Py_ssize_t ucs4len = size / sizeof(npy_ucs4);
+    npy_ucs4 *buf = (npy_ucs4 *)src;
+    int alloc = 0;
+    PyUnicodeObject *ret;
+
+    /* swap and align if needed */
+    if (swap || align) {
+        buf = (npy_ucs4 *)malloc(size);
+        if (buf == NULL) {
+            PyErr_NoMemory();
+            goto fail;
+        }
+        alloc = 1;
+        memcpy(buf, src, size);
+        if (swap) {
+            byte_swap_vector(buf, ucs4len, sizeof(npy_ucs4));
+        }
+    }
+
+    /* trim trailing zeros */
+    while (ucs4len > 0 && buf[ucs4len - 1] == 0) {
+        ucs4len--;
+    }
+
+    /* produce PyUnicode object */
+#ifdef Py_UNICODE_WIDE
+    {
+        ret = (PyUnicodeObject *)PyUnicode_FromUnicode((Py_UNICODE*)buf,
+                                                       (Py_ssize_t) ucs4len);
+        if (ret == NULL) {
+            goto fail;
+        }
+    }
+#else
+    {
+        Py_ssize_t tmpsiz = 2 * sizeof(Py_UNICODE) * ucs4len;
+        Py_ssize_t ucs2len;
+        Py_UNICODE *tmp;
+
+        if ((tmp = (Py_UNICODE *)malloc(tmpsiz)) == NULL) {
+            PyErr_NoMemory();
+            goto fail;
+        }
+        ucs2len = PyUCS2Buffer_FromUCS4(tmp, buf, ucs4len);
+        ret = (PyUnicodeObject *)PyUnicode_FromUnicode(tmp, (Py_ssize_t) ucs2len);
+        free(tmp);
+        if (ret == NULL) {
+            goto fail;
+        }
+    }
+#endif
+
+    if (alloc) {
+        free(buf);
+    }
+    return ret;
+
+fail:
+    if (alloc) {
+        free(buf);
+    }
+    return NULL;
+}
diff --git a/numpy/core/src/common/ucsnarrow.h b/numpy/core/src/common/ucsnarrow.h
new file mode 100644
index 000000000..fe31a5e25
--- /dev/null
+++ b/numpy/core/src/common/ucsnarrow.h
@@ -0,0 +1,13 @@
+#ifndef _NPY_UCSNARROW_H_
+#define _NPY_UCSNARROW_H_
+
+NPY_NO_EXPORT int
+PyUCS2Buffer_FromUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs4length);
+
+NPY_NO_EXPORT int
+PyUCS2Buffer_AsUCS4(Py_UNICODE *ucs2, npy_ucs4 *ucs4, int ucs2len, int ucs4len);
+
+NPY_NO_EXPORT PyUnicodeObject *
+PyUnicode_FromUCS4(char *src, Py_ssize_t size, int swap, int align);
+
+#endif
author	mattip <matti.picus@gmail.com>	2018-06-25 12:35:18 -0700
committer	mattip <matti.picus@gmail.com>	2018-08-21 20:06:17 +0300
commit	1bca48bb13dff1324078ca7dd968623c2a7f923f (patch)
tree	86bd3d7ffbd8a0bb68bfbb91fa2f6a06ccb6528c /numpy/core/src/common
parent	bd4500fa91456230ac9d74c704956788fe3e6ac7 (diff)
download	numpy-1bca48bb13dff1324078ca7dd968623c2a7f923f.tar.gz