summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/linalg/setup.py6
-rw-r--r--numpy/linalg/umath_linalg.cpp (renamed from numpy/linalg/umath_linalg.c.src)2215
2 files changed, 1177 insertions, 1044 deletions
diff --git a/numpy/linalg/setup.py b/numpy/linalg/setup.py
index 94536bb2c..82c688d91 100644
--- a/numpy/linalg/setup.py
+++ b/numpy/linalg/setup.py
@@ -69,9 +69,13 @@ def configuration(parent_package='', top_path=None):
# umath_linalg module
config.add_extension(
'_umath_linalg',
- sources=['umath_linalg.c.src', get_lapack_lite_sources],
+ sources=['umath_linalg.cpp', get_lapack_lite_sources],
depends=['lapack_lite/f2c.h'],
extra_info=lapack_info,
+ extra_cxx_compile_args=['-std=c++11',
+ '-D__STDC_VERSION__=0',
+ '-fno-exceptions',
+ '-fno-rtti'],
libraries=['npymath'],
)
config.add_data_files('*.pyi')
diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.cpp
index f8a154445..944f46ba7 100644
--- a/numpy/linalg/umath_linalg.c.src
+++ b/numpy/linalg/umath_linalg.cpp
@@ -22,10 +22,16 @@
#include <stdio.h>
#include <assert.h>
#include <math.h>
+#include <utility>
static const char* umath_linalg_version_string = "0.1.5";
+struct scalar_trait {};
+struct complex_trait {};
+template<typename typ>
+using dispatch_scalar = typename std::conditional<std::is_scalar<typ>::value, scalar_trait, complex_trait>::type;
+
/*
****************************************************************************
* Debugging support *
@@ -78,28 +84,28 @@ typedef double fortran_doublereal;
typedef f2c_complex fortran_complex;
typedef f2c_doublecomplex fortran_doublecomplex;
-extern fortran_int
+extern "C" fortran_int
FNAME(sgeev)(char *jobvl, char *jobvr, fortran_int *n,
float a[], fortran_int *lda, float wr[], float wi[],
float vl[], fortran_int *ldvl, float vr[], fortran_int *ldvr,
float work[], fortran_int lwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgeev)(char *jobvl, char *jobvr, fortran_int *n,
double a[], fortran_int *lda, double wr[], double wi[],
double vl[], fortran_int *ldvl, double vr[], fortran_int *ldvr,
double work[], fortran_int lwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cgeev)(char *jobvl, char *jobvr, fortran_int *n,
- f2c_doublecomplex a[], fortran_int *lda,
- f2c_doublecomplex w[],
- f2c_doublecomplex vl[], fortran_int *ldvl,
- f2c_doublecomplex vr[], fortran_int *ldvr,
- f2c_doublecomplex work[], fortran_int *lwork,
- double rwork[],
+ f2c_complex a[], fortran_int *lda,
+ f2c_complex w[],
+ f2c_complex vl[], fortran_int *ldvl,
+ f2c_complex vr[], fortran_int *ldvr,
+ f2c_complex work[], fortran_int *lwork,
+ float rwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgeev)(char *jobvl, char *jobvr, fortran_int *n,
f2c_doublecomplex a[], fortran_int *lda,
f2c_doublecomplex w[],
@@ -109,24 +115,24 @@ FNAME(zgeev)(char *jobvl, char *jobvr, fortran_int *n,
double rwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(ssyevd)(char *jobz, char *uplo, fortran_int *n,
float a[], fortran_int *lda, float w[], float work[],
fortran_int *lwork, fortran_int iwork[], fortran_int *liwork,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dsyevd)(char *jobz, char *uplo, fortran_int *n,
double a[], fortran_int *lda, double w[], double work[],
fortran_int *lwork, fortran_int iwork[], fortran_int *liwork,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cheevd)(char *jobz, char *uplo, fortran_int *n,
f2c_complex a[], fortran_int *lda,
float w[], f2c_complex work[],
fortran_int *lwork, float rwork[], fortran_int *lrwork, fortran_int iwork[],
fortran_int *liwork,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zheevd)(char *jobz, char *uplo, fortran_int *n,
f2c_doublecomplex a[], fortran_int *lda,
double w[], f2c_doublecomplex work[],
@@ -134,19 +140,19 @@ FNAME(zheevd)(char *jobz, char *uplo, fortran_int *n,
fortran_int *liwork,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(sgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs,
float a[], fortran_int *lda, float b[], fortran_int *ldb,
float s[], float *rcond, fortran_int *rank,
float work[], fortran_int *lwork, fortran_int iwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs,
double a[], fortran_int *lda, double b[], fortran_int *ldb,
double s[], double *rcond, fortran_int *rank,
double work[], fortran_int *lwork, fortran_int iwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs,
f2c_complex a[], fortran_int *lda,
f2c_complex b[], fortran_int *ldb,
@@ -154,7 +160,7 @@ FNAME(cgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs,
f2c_complex work[], fortran_int *lwork,
float rwork[], fortran_int iwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs,
f2c_doublecomplex a[], fortran_int *lda,
f2c_doublecomplex b[], fortran_int *ldb,
@@ -163,105 +169,105 @@ FNAME(zgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs,
double rwork[], fortran_int iwork[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgeqrf)(fortran_int *m, fortran_int *n, double a[], fortran_int *lda,
double tau[], double work[],
fortran_int *lwork, fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgeqrf)(fortran_int *m, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda,
f2c_doublecomplex tau[], f2c_doublecomplex work[],
fortran_int *lwork, fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dorgqr)(fortran_int *m, fortran_int *n, fortran_int *k, double a[], fortran_int *lda,
double tau[], double work[],
fortran_int *lwork, fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zungqr)(fortran_int *m, fortran_int *n, fortran_int *k, f2c_doublecomplex a[],
fortran_int *lda, f2c_doublecomplex tau[],
f2c_doublecomplex work[], fortran_int *lwork, fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(sgesv)(fortran_int *n, fortran_int *nrhs,
float a[], fortran_int *lda,
fortran_int ipiv[],
float b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgesv)(fortran_int *n, fortran_int *nrhs,
double a[], fortran_int *lda,
fortran_int ipiv[],
double b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cgesv)(fortran_int *n, fortran_int *nrhs,
f2c_complex a[], fortran_int *lda,
fortran_int ipiv[],
f2c_complex b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgesv)(fortran_int *n, fortran_int *nrhs,
f2c_doublecomplex a[], fortran_int *lda,
fortran_int ipiv[],
f2c_doublecomplex b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(sgetrf)(fortran_int *m, fortran_int *n,
float a[], fortran_int *lda,
fortran_int ipiv[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgetrf)(fortran_int *m, fortran_int *n,
double a[], fortran_int *lda,
fortran_int ipiv[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cgetrf)(fortran_int *m, fortran_int *n,
f2c_complex a[], fortran_int *lda,
fortran_int ipiv[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgetrf)(fortran_int *m, fortran_int *n,
f2c_doublecomplex a[], fortran_int *lda,
fortran_int ipiv[],
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(spotrf)(char *uplo, fortran_int *n,
float a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dpotrf)(char *uplo, fortran_int *n,
double a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cpotrf)(char *uplo, fortran_int *n,
f2c_complex a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zpotrf)(char *uplo, fortran_int *n,
f2c_doublecomplex a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(sgesdd)(char *jobz, fortran_int *m, fortran_int *n,
float a[], fortran_int *lda, float s[], float u[],
fortran_int *ldu, float vt[], fortran_int *ldvt, float work[],
fortran_int *lwork, fortran_int iwork[], fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgesdd)(char *jobz, fortran_int *m, fortran_int *n,
double a[], fortran_int *lda, double s[], double u[],
fortran_int *ldu, double vt[], fortran_int *ldvt, double work[],
fortran_int *lwork, fortran_int iwork[], fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cgesdd)(char *jobz, fortran_int *m, fortran_int *n,
f2c_complex a[], fortran_int *lda,
float s[], f2c_complex u[], fortran_int *ldu,
f2c_complex vt[], fortran_int *ldvt,
f2c_complex work[], fortran_int *lwork,
float rwork[], fortran_int iwork[], fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgesdd)(char *jobz, fortran_int *m, fortran_int *n,
f2c_doublecomplex a[], fortran_int *lda,
double s[], f2c_doublecomplex u[], fortran_int *ldu,
@@ -269,87 +275,87 @@ FNAME(zgesdd)(char *jobz, fortran_int *m, fortran_int *n,
f2c_doublecomplex work[], fortran_int *lwork,
double rwork[], fortran_int iwork[], fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(spotrs)(char *uplo, fortran_int *n, fortran_int *nrhs,
float a[], fortran_int *lda,
float b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dpotrs)(char *uplo, fortran_int *n, fortran_int *nrhs,
double a[], fortran_int *lda,
double b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cpotrs)(char *uplo, fortran_int *n, fortran_int *nrhs,
f2c_complex a[], fortran_int *lda,
f2c_complex b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zpotrs)(char *uplo, fortran_int *n, fortran_int *nrhs,
f2c_doublecomplex a[], fortran_int *lda,
f2c_doublecomplex b[], fortran_int *ldb,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(spotri)(char *uplo, fortran_int *n,
float a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(dpotri)(char *uplo, fortran_int *n,
double a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(cpotri)(char *uplo, fortran_int *n,
f2c_complex a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(zpotri)(char *uplo, fortran_int *n,
f2c_doublecomplex a[], fortran_int *lda,
fortran_int *info);
-extern fortran_int
+extern "C" fortran_int
FNAME(scopy)(fortran_int *n,
float *sx, fortran_int *incx,
float *sy, fortran_int *incy);
-extern fortran_int
+extern "C" fortran_int
FNAME(dcopy)(fortran_int *n,
double *sx, fortran_int *incx,
double *sy, fortran_int *incy);
-extern fortran_int
+extern "C" fortran_int
FNAME(ccopy)(fortran_int *n,
f2c_complex *sx, fortran_int *incx,
f2c_complex *sy, fortran_int *incy);
-extern fortran_int
+extern "C" fortran_int
FNAME(zcopy)(fortran_int *n,
f2c_doublecomplex *sx, fortran_int *incx,
f2c_doublecomplex *sy, fortran_int *incy);
-extern float
+extern "C" float
FNAME(sdot)(fortran_int *n,
float *sx, fortran_int *incx,
float *sy, fortran_int *incy);
-extern double
+extern "C" double
FNAME(ddot)(fortran_int *n,
double *sx, fortran_int *incx,
double *sy, fortran_int *incy);
-extern void
+extern "C" void
FNAME(cdotu)(f2c_complex *ret, fortran_int *n,
f2c_complex *sx, fortran_int *incx,
f2c_complex *sy, fortran_int *incy);
-extern void
+extern "C" void
FNAME(zdotu)(f2c_doublecomplex *ret, fortran_int *n,
f2c_doublecomplex *sx, fortran_int *incx,
f2c_doublecomplex *sy, fortran_int *incy);
-extern void
+extern "C" void
FNAME(cdotc)(f2c_complex *ret, fortran_int *n,
f2c_complex *sx, fortran_int *incx,
f2c_complex *sy, fortran_int *incy);
-extern void
+extern "C" void
FNAME(zdotc)(f2c_doublecomplex *ret, fortran_int *n,
f2c_doublecomplex *sx, fortran_int *incx,
f2c_doublecomplex *sy, fortran_int *incy);
-extern fortran_int
+extern "C" fortran_int
FNAME(sgemm)(char *transa, char *transb,
fortran_int *m, fortran_int *n, fortran_int *k,
float *alpha,
@@ -357,7 +363,7 @@ FNAME(sgemm)(char *transa, char *transb,
float *b, fortran_int *ldb,
float *beta,
float *c, fortran_int *ldc);
-extern fortran_int
+extern "C" fortran_int
FNAME(dgemm)(char *transa, char *transb,
fortran_int *m, fortran_int *n, fortran_int *k,
double *alpha,
@@ -365,7 +371,7 @@ FNAME(dgemm)(char *transa, char *transb,
double *b, fortran_int *ldb,
double *beta,
double *c, fortran_int *ldc);
-extern fortran_int
+extern "C" fortran_int
FNAME(cgemm)(char *transa, char *transb,
fortran_int *m, fortran_int *n, fortran_int *k,
f2c_complex *alpha,
@@ -373,7 +379,7 @@ FNAME(cgemm)(char *transa, char *transb,
f2c_complex *b, fortran_int *ldb,
f2c_complex *beta,
f2c_complex *c, fortran_int *ldc);
-extern fortran_int
+extern "C" fortran_int
FNAME(zgemm)(char *transa, char *transb,
fortran_int *m, fortran_int *n, fortran_int *k,
f2c_doublecomplex *alpha,
@@ -427,80 +433,92 @@ set_fp_invalid_or_clear(int error_occurred)
#define UMATH_LINALG_MODULE_NAME "_umath_linalg"
-typedef union {
- fortran_complex f;
- npy_cfloat npy;
- float array[2];
-} COMPLEX_t;
-
-typedef union {
- fortran_doublecomplex f;
- npy_cdouble npy;
- double array[2];
-} DOUBLECOMPLEX_t;
-
-static float s_one;
-static float s_zero;
-static float s_minus_one;
-static float s_ninf;
-static float s_nan;
-static double d_one;
-static double d_zero;
-static double d_minus_one;
-static double d_ninf;
-static double d_nan;
-static COMPLEX_t c_one;
-static COMPLEX_t c_zero;
-static COMPLEX_t c_minus_one;
-static COMPLEX_t c_ninf;
-static COMPLEX_t c_nan;
-static DOUBLECOMPLEX_t z_one;
-static DOUBLECOMPLEX_t z_zero;
-static DOUBLECOMPLEX_t z_minus_one;
-static DOUBLECOMPLEX_t z_ninf;
-static DOUBLECOMPLEX_t z_nan;
-
-static void init_constants(void)
-{
- /*
- this is needed as NPY_INFINITY and NPY_NAN macros
- can't be used as initializers. I prefer to just set
- all the constants the same way.
- */
- s_one = 1.0f;
- s_zero = 0.0f;
- s_minus_one = -1.0f;
- s_ninf = -NPY_INFINITYF;
- s_nan = NPY_NANF;
-
- d_one = 1.0;
- d_zero = 0.0;
- d_minus_one = -1.0;
- d_ninf = -NPY_INFINITY;
- d_nan = NPY_NAN;
-
- c_one.array[0] = 1.0f;
- c_one.array[1] = 0.0f;
- c_zero.array[0] = 0.0f;
- c_zero.array[1] = 0.0f;
- c_minus_one.array[0] = -1.0f;
- c_minus_one.array[1] = 0.0f;
- c_ninf.array[0] = -NPY_INFINITYF;
- c_ninf.array[1] = 0.0f;
- c_nan.array[0] = NPY_NANF;
- c_nan.array[1] = NPY_NANF;
-
- z_one.array[0] = 1.0;
- z_one.array[1] = 0.0;
- z_zero.array[0] = 0.0;
- z_zero.array[1] = 0.0;
- z_minus_one.array[0] = -1.0;
- z_minus_one.array[1] = 0.0;
- z_ninf.array[0] = -NPY_INFINITY;
- z_ninf.array[1] = 0.0;
- z_nan.array[0] = NPY_NAN;
- z_nan.array[1] = NPY_NAN;
-}
+template<typename T>
+struct numeric_limits;
+
+template<>
+struct numeric_limits<float> {
+static constexpr float one = 1.0f;
+static constexpr float zero = 0.0f;
+static constexpr float minus_one = -1.0f;
+static const float ninf;
+static const float nan;
+};
+constexpr float numeric_limits<float>::one;
+constexpr float numeric_limits<float>::zero;
+constexpr float numeric_limits<float>::minus_one;
+const float numeric_limits<float>::ninf = -NPY_INFINITYF;
+const float numeric_limits<float>::nan = NPY_NANF;
+
+template<>
+struct numeric_limits<double> {
+static constexpr double one = 1.0;
+static constexpr double zero = 0.0;
+static constexpr double minus_one = -1.0;
+static const double ninf;
+static const double nan;
+};
+constexpr double numeric_limits<double>::one;
+constexpr double numeric_limits<double>::zero;
+constexpr double numeric_limits<double>::minus_one;
+const double numeric_limits<double>::ninf = -NPY_INFINITY;
+const double numeric_limits<double>::nan = NPY_NAN;
+
+template<>
+struct numeric_limits<npy_cfloat> {
+static constexpr npy_cfloat one = {1.0f, 0.0f};
+static constexpr npy_cfloat zero = {0.0f, 0.0f};
+static constexpr npy_cfloat minus_one = {-1.0f, 0.0f};
+static const npy_cfloat ninf;
+static const npy_cfloat nan;
+};
+constexpr npy_cfloat numeric_limits<npy_cfloat>::one;
+constexpr npy_cfloat numeric_limits<npy_cfloat>::zero;
+constexpr npy_cfloat numeric_limits<npy_cfloat>::minus_one;
+const npy_cfloat numeric_limits<npy_cfloat>::ninf = {-NPY_INFINITYF, 0.0f};
+const npy_cfloat numeric_limits<npy_cfloat>::nan = {NPY_NANF, NPY_NANF};
+
+template<>
+struct numeric_limits<f2c_complex> {
+static constexpr f2c_complex one = {1.0f, 0.0f};
+static constexpr f2c_complex zero = {0.0f, 0.0f};
+static constexpr f2c_complex minus_one = {-1.0f, 0.0f};
+static const f2c_complex ninf;
+static const f2c_complex nan;
+};
+constexpr f2c_complex numeric_limits<f2c_complex>::one;
+constexpr f2c_complex numeric_limits<f2c_complex>::zero;
+constexpr f2c_complex numeric_limits<f2c_complex>::minus_one;
+const f2c_complex numeric_limits<f2c_complex>::ninf = {-NPY_INFINITYF, 0.0f};
+const f2c_complex numeric_limits<f2c_complex>::nan = {NPY_NANF, NPY_NANF};
+
+template<>
+struct numeric_limits<npy_cdouble> {
+static constexpr npy_cdouble one = {1.0, 0.0};
+static constexpr npy_cdouble zero = {0.0, 0.0};
+static constexpr npy_cdouble minus_one = {-1.0, 0.0};
+static const npy_cdouble ninf;
+static const npy_cdouble nan;
+};
+constexpr npy_cdouble numeric_limits<npy_cdouble>::one;
+constexpr npy_cdouble numeric_limits<npy_cdouble>::zero;
+constexpr npy_cdouble numeric_limits<npy_cdouble>::minus_one;
+const npy_cdouble numeric_limits<npy_cdouble>::ninf = {-NPY_INFINITY, 0.0};
+const npy_cdouble numeric_limits<npy_cdouble>::nan = {NPY_NAN, NPY_NAN};
+
+template<>
+struct numeric_limits<f2c_doublecomplex> {
+static constexpr f2c_doublecomplex one = {1.0, 0.0};
+static constexpr f2c_doublecomplex zero = {0.0, 0.0};
+static constexpr f2c_doublecomplex minus_one = {-1.0, 0.0};
+static const f2c_doublecomplex ninf;
+static const f2c_doublecomplex nan;
+};
+constexpr f2c_doublecomplex numeric_limits<f2c_doublecomplex>::one;
+constexpr f2c_doublecomplex numeric_limits<f2c_doublecomplex>::zero;
+constexpr f2c_doublecomplex numeric_limits<f2c_doublecomplex>::minus_one;
+const f2c_doublecomplex numeric_limits<f2c_doublecomplex>::ninf = {-NPY_INFINITY, 0.0};
+const f2c_doublecomplex numeric_limits<f2c_doublecomplex>::nan = {NPY_NAN, NPY_NAN};
/*
*****************************************************************************
@@ -590,36 +608,33 @@ dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params)
}
static NPY_INLINE void
-print_FLOAT(npy_float s)
+print(npy_float s)
{
TRACE_TXT(" %8.4f", s);
}
static NPY_INLINE void
-print_DOUBLE(npy_double d)
+print(npy_double d)
{
TRACE_TXT(" %10.6f", d);
}
static NPY_INLINE void
-print_CFLOAT(npy_cfloat c)
+print(npy_cfloat c)
{
float* c_parts = (float*)&c;
TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]);
}
static NPY_INLINE void
-print_CDOUBLE(npy_cdouble z)
+print(npy_cdouble z)
{
double* z_parts = (double*)&z;
TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]);
}
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
- */
+template<typename typ>
static NPY_INLINE void
-dump_@TYPE@_matrix(const char* name,
+dump_matrix(const char* name,
size_t rows, size_t columns,
- const @typ@* ptr)
+ const typ* ptr)
{
size_t i, j;
@@ -629,13 +644,12 @@ dump_@TYPE@_matrix(const char* name,
TRACE_TXT("| ");
for (j = 0; j < columns; j++)
{
- print_@TYPE@(ptr[j*rows + i]);
+ print(ptr[j*rows + i]);
TRACE_TXT(", ");
}
TRACE_TXT(" |\n");
}
}
-/**end repeat**/
/*
@@ -754,45 +768,100 @@ update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count)
/*
*****************************************************************************
+ ** DISPATCHER FUNCS **
+ *****************************************************************************
+ */
+static fortran_int copy(fortran_int *n,
+ float *sx, fortran_int *incx,
+ float *sy, fortran_int *incy) { return FNAME(scopy)(n, sx, incx,
+ sy, incy);
+}
+static fortran_int copy(fortran_int *n,
+ double *sx, fortran_int *incx,
+ double *sy, fortran_int *incy) { return FNAME(dcopy)(n, sx, incx,
+ sy, incy);
+}
+static fortran_int copy(fortran_int *n,
+ f2c_complex *sx, fortran_int *incx,
+ f2c_complex *sy, fortran_int *incy) { return FNAME(ccopy)(n, sx, incx,
+ sy, incy);
+}
+static fortran_int copy(fortran_int *n,
+ f2c_doublecomplex *sx, fortran_int *incx,
+ f2c_doublecomplex *sy, fortran_int *incy) { return FNAME(zcopy)(n, sx, incx,
+ sy, incy);
+}
+
+static fortran_int getrf(fortran_int *m, fortran_int *n, float a[], fortran_int
+*lda, fortran_int ipiv[], fortran_int *info) {
+ return LAPACK(sgetrf)(m, n, a, lda, ipiv, info);
+}
+static fortran_int getrf(fortran_int *m, fortran_int *n, double a[], fortran_int
+*lda, fortran_int ipiv[], fortran_int *info) {
+ return LAPACK(dgetrf)(m, n, a, lda, ipiv, info);
+}
+static fortran_int getrf(fortran_int *m, fortran_int *n, f2c_complex a[], fortran_int
+*lda, fortran_int ipiv[], fortran_int *info) {
+ return LAPACK(cgetrf)(m, n, a, lda, ipiv, info);
+}
+static fortran_int getrf(fortran_int *m, fortran_int *n, f2c_doublecomplex a[], fortran_int
+*lda, fortran_int ipiv[], fortran_int *info) {
+ return LAPACK(zgetrf)(m, n, a, lda, ipiv, info);
+}
+
+/*
+ *****************************************************************************
** HELPER FUNCS **
*****************************************************************************
*/
+template<typename T>
+struct fortran_type {
+using type = T;
+};
+
+template<> struct fortran_type<npy_cfloat> { using type = f2c_complex;};
+template<> struct fortran_type<npy_cdouble> { using type = f2c_doublecomplex;};
+template<typename T>
+using fortran_type_t = typename fortran_type<T>::type;
+
+template<typename T>
+struct basetype {
+using type = T;
+};
+template<> struct basetype<npy_cfloat> { using type = npy_float;};
+template<> struct basetype<npy_cdouble> { using type = npy_double;};
+template<> struct basetype<f2c_complex> { using type = fortran_real;};
+template<> struct basetype<f2c_doublecomplex> { using type = fortran_doublereal;};
+template<typename T>
+using basetype_t = typename basetype<T>::type;
/* rearranging of 2D matrices using blas */
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t#
- #copy = scopy, dcopy, ccopy, zcopy#
- #nan = s_nan, d_nan, c_nan, z_nan#
- #zero = s_zero, d_zero, c_zero, z_zero#
- */
+template<typename typ>
static NPY_INLINE void *
-linearize_@TYPE@_matrix(void *dst_in,
- void *src_in,
+linearize_matrix(typ *dst,
+ typ *src,
const LINEARIZE_DATA_t* data)
{
- @typ@ *src = (@typ@ *) src_in;
- @typ@ *dst = (@typ@ *) dst_in;
-
+ using ftyp = fortran_type_t<typ>;
if (dst) {
int i, j;
- @typ@* rv = dst;
+ typ* rv = dst;
fortran_int columns = (fortran_int)data->columns;
fortran_int column_strides =
- (fortran_int)(data->column_strides/sizeof(@typ@));
+ (fortran_int)(data->column_strides/sizeof(typ));
fortran_int one = 1;
for (i = 0; i < data->rows; i++) {
if (column_strides > 0) {
- FNAME(@copy@)(&columns,
- (void*)src, &column_strides,
- (void*)dst, &one);
+ copy(&columns,
+ (ftyp*)src, &column_strides,
+ (ftyp*)dst, &one);
}
else if (column_strides < 0) {
- FNAME(@copy@)(&columns,
- (void*)((@typ@*)src + (columns-1)*column_strides),
+ copy(&columns,
+ ((ftyp*)src + (columns-1)*column_strides),
&column_strides,
- (void*)dst, &one);
+ (ftyp*)dst, &one);
}
else {
/*
@@ -801,10 +870,10 @@ linearize_@TYPE@_matrix(void *dst_in,
* manually
*/
for (j = 0; j < columns; ++j) {
- memcpy((@typ@*)dst + j, (@typ@*)src, sizeof(@typ@));
+ memcpy(dst + j, src, sizeof(typ));
}
}
- src += data->row_strides/sizeof(@typ@);
+ src += data->row_strides/sizeof(typ);
dst += data->output_lead_dim;
}
return rv;
@@ -813,31 +882,31 @@ linearize_@TYPE@_matrix(void *dst_in,
}
}
+template<typename typ>
static NPY_INLINE void *
-delinearize_@TYPE@_matrix(void *dst_in,
- void *src_in,
+delinearize_matrix(typ *dst,
+ typ *src,
const LINEARIZE_DATA_t* data)
{
- @typ@ *src = (@typ@ *) src_in;
- @typ@ *dst = (@typ@ *) dst_in;
+using ftyp = fortran_type_t<typ>;
if (src) {
int i;
- @typ@ *rv = src;
+ typ *rv = src;
fortran_int columns = (fortran_int)data->columns;
fortran_int column_strides =
- (fortran_int)(data->column_strides/sizeof(@typ@));
+ (fortran_int)(data->column_strides/sizeof(typ));
fortran_int one = 1;
for (i = 0; i < data->rows; i++) {
if (column_strides > 0) {
- FNAME(@copy@)(&columns,
- (void*)src, &one,
- (void*)dst, &column_strides);
+ copy(&columns,
+ (ftyp*)src, &one,
+ (ftyp*)dst, &column_strides);
}
else if (column_strides < 0) {
- FNAME(@copy@)(&columns,
- (void*)src, &one,
- (void*)((@typ@*)dst + (columns-1)*column_strides),
+ copy(&columns,
+ (ftyp*)src, &one,
+ ((ftyp*)dst + (columns-1)*column_strides),
&column_strides);
}
else {
@@ -847,13 +916,13 @@ delinearize_@TYPE@_matrix(void *dst_in,
* manually
*/
if (columns > 0) {
- memcpy((@typ@*)dst,
- (@typ@*)src + (columns-1),
- sizeof(@typ@));
+ memcpy(dst,
+ src + (columns-1),
+ sizeof(typ));
}
}
src += data->output_lead_dim;
- dst += data->row_strides/sizeof(@typ@);
+ dst += data->row_strides/sizeof(typ);
}
return rv;
@@ -862,116 +931,97 @@ delinearize_@TYPE@_matrix(void *dst_in,
}
}
+template<typename typ>
static NPY_INLINE void
-nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data)
+nan_matrix(typ *dst, const LINEARIZE_DATA_t* data)
{
- @typ@ *dst = (@typ@ *) dst_in;
-
int i, j;
for (i = 0; i < data->rows; i++) {
- @typ@ *cp = dst;
- ptrdiff_t cs = data->column_strides/sizeof(@typ@);
+ typ *cp = dst;
+ ptrdiff_t cs = data->column_strides/sizeof(typ);
for (j = 0; j < data->columns; ++j) {
- *cp = @nan@;
+ *cp = numeric_limits<typ>::nan;
cp += cs;
}
- dst += data->row_strides/sizeof(@typ@);
+ dst += data->row_strides/sizeof(typ);
}
}
+template<typename typ>
static NPY_INLINE void
-zero_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data)
+zero_matrix(typ *dst, const LINEARIZE_DATA_t* data)
{
- @typ@ *dst = (@typ@ *) dst_in;
-
int i, j;
for (i = 0; i < data->rows; i++) {
- @typ@ *cp = dst;
- ptrdiff_t cs = data->column_strides/sizeof(@typ@);
+ typ *cp = dst;
+ ptrdiff_t cs = data->column_strides/sizeof(typ);
for (j = 0; j < data->columns; ++j) {
- *cp = @zero@;
+ *cp = numeric_limits<typ>::zero;
cp += cs;
}
- dst += data->row_strides/sizeof(@typ@);
+ dst += data->row_strides/sizeof(typ);
}
}
-/**end repeat**/
-
/* identity square matrix generation */
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t#
- #cblas_type = s, d, c, z#
- */
+template<typename typ>
static NPY_INLINE void
-identity_@TYPE@_matrix(void *ptr, size_t n)
+identity_matrix(typ *matrix, size_t n)
{
size_t i;
- @typ@ *matrix = (@typ@*) ptr;
/* in IEEE floating point, zeroes are represented as bitwise 0 */
- memset(matrix, 0, n*n*sizeof(@typ@));
+ memset(matrix, 0, n*n*sizeof(typ));
for (i = 0; i < n; ++i)
{
- *matrix = @cblas_type@_one;
+ *matrix = numeric_limits<typ>::one;
matrix += n+1;
}
}
-/**end repeat**/
/* lower/upper triangular matrix using blas (in place) */
-/**begin repeat
-
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t#
- #cblas_type = s, d, c, z#
- */
+template<typename typ>
static NPY_INLINE void
-triu_@TYPE@_matrix(void *ptr, size_t n)
+triu_matrix(typ *matrix, size_t n)
{
size_t i, j;
- @typ@ *matrix = (@typ@*)ptr;
matrix += n;
for (i = 1; i < n; ++i) {
for (j = 0; j < i; ++j) {
- matrix[j] = @cblas_type@_zero;
+ matrix[j] = numeric_limits<typ>::zero;
}
matrix += n;
}
}
-/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Determinants */
-/**begin repeat
- #TYPE = FLOAT, DOUBLE#
- #typ = npy_float, npy_double#
- #log_func = npy_logf, npy_log#
- #exp_func = npy_expf, npy_exp#
- #zero = 0.0f, 0.0#
-*/
+static npy_float npylog(npy_float f) { return npy_logf(f);}
+static npy_double npylog(npy_double d) { return npy_log(d);}
+static npy_float npyexp(npy_float f) { return npy_expf(f);}
+static npy_double npyexp(npy_double d) { return npy_exp(d);}
+template<typename typ>
static NPY_INLINE void
-@TYPE@_slogdet_from_factored_diagonal(@typ@* src,
+slogdet_from_factored_diagonal(typ* src,
fortran_int m,
- @typ@ *sign,
- @typ@ *logdet)
+ typ *sign,
+ typ *logdet)
{
- @typ@ acc_sign = *sign;
- @typ@ acc_logdet = @zero@;
+ typ acc_sign = *sign;
+ typ acc_logdet = numeric_limits<typ>::zero;
int i;
for (i = 0; i < m; i++) {
- @typ@ abs_element = *src;
- if (abs_element < @zero@) {
+ typ abs_element = *src;
+ if (abs_element < numeric_limits<typ>::zero) {
acc_sign = -acc_sign;
abs_element = -abs_element;
}
- acc_logdet += @log_func@(abs_element);
+ acc_logdet += npylog(abs_element);
src += m+1;
}
@@ -979,32 +1029,26 @@ static NPY_INLINE void
*logdet = acc_logdet;
}
-static NPY_INLINE @typ@
-@TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet)
+template<typename typ>
+static NPY_INLINE typ
+det_from_slogdet(typ sign, typ logdet)
{
- @typ@ result = sign * @exp_func@(logdet);
+ typ result = sign * npyexp(logdet);
return result;
}
-/**end repeat**/
+npy_float npyabs(npy_cfloat z) { return npy_cabsf(z);}
+npy_double npyabs(npy_cdouble z) { return npy_cabs(z);}
-/**begin repeat
- #TYPE = CFLOAT, CDOUBLE#
- #typ = npy_cfloat, npy_cdouble#
- #basetyp = npy_float, npy_double#
- #abs_func = npy_cabsf, npy_cabs#
- #log_func = npy_logf, npy_log#
- #exp_func = npy_expf, npy_exp#
- #zero = 0.0f, 0.0#
-*/
-#define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0])
-#define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1])
+#define RE(COMPLEX) (COMPLEX).real
+#define IM(COMPLEX) (COMPLEX).imag
-static NPY_INLINE @typ@
-@TYPE@_mult(@typ@ op1, @typ@ op2)
+template<typename typ>
+static NPY_INLINE typ
+mult(typ op1, typ op2)
{
- @typ@ rv;
+ typ rv;
RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2);
IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2);
@@ -1013,25 +1057,26 @@ static NPY_INLINE @typ@
}
+template<typename typ, typename basetyp>
static NPY_INLINE void
-@TYPE@_slogdet_from_factored_diagonal(@typ@* src,
+slogdet_from_factored_diagonal(typ* src,
fortran_int m,
- @typ@ *sign,
- @basetyp@ *logdet)
+ typ *sign,
+ basetyp *logdet)
{
int i;
- @typ@ sign_acc = *sign;
- @basetyp@ logdet_acc = @zero@;
+ typ sign_acc = *sign;
+ basetyp logdet_acc = numeric_limits<basetyp>::zero;
for (i = 0; i < m; i++)
{
- @basetyp@ abs_element = @abs_func@(*src);
- @typ@ sign_element;
+ basetyp abs_element = npyabs(*src);
+ typ sign_element;
RE(sign_element) = RE(*src) / abs_element;
IM(sign_element) = IM(*src) / abs_element;
- sign_acc = @TYPE@_mult(sign_acc, sign_element);
- logdet_acc += @log_func@(abs_element);
+ sign_acc = mult(sign_acc, sign_element);
+ logdet_acc += npylog(abs_element);
src += m + 1;
}
@@ -1039,17 +1084,17 @@ static NPY_INLINE void
*logdet = logdet_acc;
}
-static NPY_INLINE @typ@
-@TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet)
+template<typename typ, typename basetyp>
+static NPY_INLINE typ
+det_from_slogdet(typ sign, basetyp logdet)
{
- @typ@ tmp;
- RE(tmp) = @exp_func@(logdet);
- IM(tmp) = @zero@;
- return @TYPE@_mult(sign, tmp);
+ typ tmp;
+ RE(tmp) = npyexp(logdet);
+ IM(tmp) = numeric_limits<basetyp>::zero;
+ return mult(sign, tmp);
}
#undef RE
#undef IM
-/**end repeat**/
/* As in the linalg package, the determinant is computed via LU factorization
@@ -1057,26 +1102,20 @@ static NPY_INLINE @typ@
* slogdet computes sign + log(determinant).
* det computes sign * exp(slogdet).
*/
-/**begin repeat
-
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
- #basetyp = npy_float, npy_double, npy_float, npy_double#
- #cblas_type = s, d, c, z#
-*/
-
+template<typename typ, typename basetyp>
static NPY_INLINE void
-@TYPE@_slogdet_single_element(fortran_int m,
- void* src,
+slogdet_single_element(fortran_int m,
+ typ* src,
fortran_int* pivots,
- @typ@ *sign,
- @basetyp@ *logdet)
+ typ *sign,
+ basetyp *logdet)
{
+using ftyp = fortran_type_t<typ>;
fortran_int info = 0;
fortran_int lda = fortran_int_max(m, 1);
int i;
/* note: done in place */
- LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &lda, pivots, &info);
+ getrf(&m, &m, (ftyp*)src, &lda, pivots, &info);
if (info == 0) {
int change_sign = 0;
@@ -1086,23 +1125,20 @@ static NPY_INLINE void
change_sign += (pivots[i] != (i+1));
}
- memcpy(sign,
- (change_sign % 2)?
- &@cblas_type@_minus_one :
- &@cblas_type@_one
- , sizeof(*sign));
- @TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet);
+ *sign = (change_sign % 2)?numeric_limits<typ>::minus_one:numeric_limits<typ>::one;
+ slogdet_from_factored_diagonal(src, m, sign, logdet);
} else {
/*
if getrf fails, use 0 as sign and -inf as logdet
*/
- memcpy(sign, &@cblas_type@_zero, sizeof(*sign));
- memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet));
+ *sign = numeric_limits<typ>::zero;
+ *logdet = numeric_limits<basetyp>::ninf;
}
}
+template<typename typ, typename basetyp>
static void
-@TYPE@_slogdet(char **args,
+slogdet(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
@@ -1123,7 +1159,7 @@ static void
INIT_OUTER_LOOP_3
m = (fortran_int) dimensions[0];
safe_m = m;
- matrix_size = safe_m * safe_m * sizeof(@typ@);
+ matrix_size = safe_m * safe_m * sizeof(typ);
pivot_size = safe_m * sizeof(fortran_int);
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size);
@@ -1132,20 +1168,21 @@ static void
/* swapped steps to get matrix in FORTRAN order */
init_linearize_data(&lin_data, m, m, steps[1], steps[0]);
BEGIN_OUTER_LOOP_3
- linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
- @TYPE@_slogdet_single_element(m,
- (void*)tmp_buff,
+ linearize_matrix((typ*)tmp_buff, (typ*)args[0], &lin_data);
+ slogdet_single_element(m,
+ (typ*)tmp_buff,
(fortran_int*)(tmp_buff+matrix_size),
- (@typ@*)args[1],
- (@basetyp@*)args[2]);
+ (typ*)args[1],
+ (basetyp*)args[2]);
END_OUTER_LOOP
free(tmp_buff);
}
}
+template<typename typ, typename basetyp>
static void
-@TYPE@_det(char **args,
+det(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
@@ -1166,42 +1203,42 @@ static void
INIT_OUTER_LOOP_2
m = (fortran_int) dimensions[0];
safe_m = m;
- matrix_size = safe_m * safe_m * sizeof(@typ@);
+ matrix_size = safe_m * safe_m * sizeof(typ);
pivot_size = safe_m * sizeof(fortran_int);
tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size);
if (tmp_buff) {
LINEARIZE_DATA_t lin_data;
- @typ@ sign;
- @basetyp@ logdet;
+ typ sign;
+ basetyp logdet;
/* swapped steps to get matrix in FORTRAN order */
init_linearize_data(&lin_data, m, m, steps[1], steps[0]);
BEGIN_OUTER_LOOP_2
- linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
- @TYPE@_slogdet_single_element(m,
- (void*)tmp_buff,
+ linearize_matrix((typ*)tmp_buff, (typ*)args[0], &lin_data);
+ slogdet_single_element(m,
+ (typ*)tmp_buff,
(fortran_int*)(tmp_buff + matrix_size),
&sign,
&logdet);
- *(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet);
+ *(typ *)args[1] = det_from_slogdet(sign, logdet);
END_OUTER_LOOP
free(tmp_buff);
}
}
-/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Eigh family */
-typedef struct eigh_params_struct {
- void *A; /* matrix */
- void *W; /* eigenvalue vector */
- void *WORK; /* main work buffer */
- void *RWORK; /* secondary work buffer (for complex versions) */
- void *IWORK;
+template<typename typ>
+struct EIGH_PARAMS_t {
+ typ *A; /* matrix */
+ basetype_t<typ> *W; /* eigenvalue vector */
+ typ *WORK; /* main work buffer */
+ basetype_t<typ> *RWORK; /* secondary work buffer (for complex versions) */
+ fortran_int *IWORK;
fortran_int N;
fortran_int LWORK;
fortran_int LRWORK;
@@ -1209,20 +1246,24 @@ typedef struct eigh_params_struct {
char JOBZ;
char UPLO;
fortran_int LDA;
-} EIGH_PARAMS_t;
-
-/**begin repeat
- #TYPE = FLOAT, DOUBLE#
- #typ = npy_float, npy_double#
- #ftyp = fortran_real, fortran_doublereal#
- #lapack_func = ssyevd, dsyevd#
-*/
+} ;
static NPY_INLINE fortran_int
-call_@lapack_func@(EIGH_PARAMS_t *params)
+call_evd(EIGH_PARAMS_t<npy_float> *params)
+{
+ fortran_int rv;
+ LAPACK(ssyevd)(&params->JOBZ, &params->UPLO, &params->N,
+ params->A, &params->LDA, params->W,
+ params->WORK, &params->LWORK,
+ params->IWORK, &params->LIWORK,
+ &rv);
+ return rv;
+}
+static NPY_INLINE fortran_int
+call_evd(EIGH_PARAMS_t<npy_double> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->JOBZ, &params->UPLO, &params->N,
+ LAPACK(dsyevd)(&params->JOBZ, &params->UPLO, &params->N,
params->A, &params->LDA, params->W,
params->WORK, &params->LWORK,
params->IWORK, &params->LIWORK,
@@ -1230,13 +1271,15 @@ call_@lapack_func@(EIGH_PARAMS_t *params)
return rv;
}
+
/*
* Initialize the parameters to use in for the lapack function _syevd
* Handles buffer allocation
*/
+template<typename typ>
static NPY_INLINE int
-init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
- fortran_int N)
+init_evd(EIGH_PARAMS_t<typ>* params, char JOBZ, char UPLO,
+ fortran_int N, scalar_trait)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
@@ -1244,19 +1287,19 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
fortran_int liwork;
npy_uint8 *a, *w, *work, *iwork;
size_t safe_N = N;
- size_t alloc_size = safe_N * (safe_N + 1) * sizeof(@typ@);
+ size_t alloc_size = safe_N * (safe_N + 1) * sizeof(typ);
fortran_int lda = fortran_int_max(N, 1);
- mem_buff = malloc(alloc_size);
+ mem_buff = (npy_uint8 *)malloc(alloc_size);
if (!mem_buff) {
goto error;
}
a = mem_buff;
- w = mem_buff + safe_N * safe_N * sizeof(@typ@);
+ w = mem_buff + safe_N * safe_N * sizeof(typ);
- params->A = a;
- params->W = w;
+ params->A = (typ*)a;
+ params->W = (typ*)w;
params->RWORK = NULL; /* unused */
params->N = N;
params->LRWORK = 0; /* unused */
@@ -1266,7 +1309,7 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
/* Work size query */
{
- @typ@ query_work_size;
+ typ query_work_size;
fortran_int query_iwork_size;
params->LWORK = -1;
@@ -1274,7 +1317,7 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
params->WORK = &query_work_size;
params->IWORK = &query_iwork_size;
- if (call_@lapack_func@(params) != 0) {
+ if (call_evd(params) != 0) {
goto error;
}
@@ -1282,18 +1325,18 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
liwork = query_iwork_size;
}
- mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int));
+ mem_buff2 = (npy_uint8 *)malloc(lwork*sizeof(typ) + liwork*sizeof(fortran_int));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
- iwork = mem_buff2 + lwork*sizeof(@typ@);
+ iwork = mem_buff2 + lwork*sizeof(typ);
params->LWORK = lwork;
- params->WORK = work;
+ params->WORK = (typ*)work;
params->LIWORK = liwork;
- params->IWORK = iwork;
+ params->IWORK = (fortran_int*)iwork;
return 1;
@@ -1305,40 +1348,44 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
return 0;
}
-/**end repeat**/
-/**begin repeat
- #TYPE = CFLOAT, CDOUBLE#
- #typ = npy_cfloat, npy_cdouble#
- #basetyp = npy_float, npy_double#
- #ftyp = fortran_complex, fortran_doublecomplex#
- #fbasetyp = fortran_real, fortran_doublereal#
- #lapack_func = cheevd, zheevd#
-*/
static NPY_INLINE fortran_int
-call_@lapack_func@(EIGH_PARAMS_t *params)
+call_evd(EIGH_PARAMS_t<npy_cfloat> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->JOBZ, &params->UPLO, &params->N,
- params->A, &params->LDA, params->W,
- params->WORK, &params->LWORK,
+ LAPACK(cheevd)(&params->JOBZ, &params->UPLO, &params->N,
+ (fortran_type_t<npy_cfloat>*)params->A, &params->LDA, params->W,
+ (fortran_type_t<npy_cfloat>*)params->WORK, &params->LWORK,
params->RWORK, &params->LRWORK,
params->IWORK, &params->LIWORK,
&rv);
return rv;
}
-/*
- * Initialize the parameters to use in for the lapack function _heev
- * Handles buffer allocation
- */
+static NPY_INLINE fortran_int
+call_evd(EIGH_PARAMS_t<npy_cdouble> *params)
+{
+ fortran_int rv;
+ LAPACK(zheevd)(&params->JOBZ, &params->UPLO, &params->N,
+ (fortran_type_t<npy_cdouble>*)params->A, &params->LDA, params->W,
+ (fortran_type_t<npy_cdouble>*)params->WORK, &params->LWORK,
+ params->RWORK, &params->LRWORK,
+ params->IWORK, &params->LIWORK,
+ &rv);
+ return rv;
+}
+
+template<typename typ>
static NPY_INLINE int
-init_@lapack_func@(EIGH_PARAMS_t *params,
+init_evd(EIGH_PARAMS_t<typ> *params,
char JOBZ,
char UPLO,
- fortran_int N)
+ fortran_int N, complex_trait)
{
+ using basetyp = basetype_t<typ>;
+using ftyp = fortran_type_t<typ>;
+using fbasetyp = fortran_type_t<basetyp>;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
fortran_int lwork;
@@ -1348,16 +1395,16 @@ init_@lapack_func@(EIGH_PARAMS_t *params,
size_t safe_N = N;
fortran_int lda = fortran_int_max(N, 1);
- mem_buff = malloc(safe_N * safe_N * sizeof(@typ@) +
- safe_N * sizeof(@basetyp@));
+ mem_buff = (npy_uint8 *)malloc(safe_N * safe_N * sizeof(typ) +
+ safe_N * sizeof(basetyp));
if (!mem_buff) {
goto error;
}
a = mem_buff;
- w = mem_buff + safe_N * safe_N * sizeof(@typ@);
+ w = mem_buff + safe_N * safe_N * sizeof(typ);
- params->A = a;
- params->W = w;
+ params->A = (typ*)a;
+ params->W = (basetyp*)w;
params->N = N;
params->JOBZ = JOBZ;
params->UPLO = UPLO;
@@ -1365,40 +1412,40 @@ init_@lapack_func@(EIGH_PARAMS_t *params,
/* Work size query */
{
- @ftyp@ query_work_size;
- @fbasetyp@ query_rwork_size;
+ ftyp query_work_size;
+ fbasetyp query_rwork_size;
fortran_int query_iwork_size;
params->LWORK = -1;
params->LRWORK = -1;
params->LIWORK = -1;
- params->WORK = &query_work_size;
- params->RWORK = &query_rwork_size;
+ params->WORK = (typ*)&query_work_size;
+ params->RWORK = (basetyp*)&query_rwork_size;
params->IWORK = &query_iwork_size;
- if (call_@lapack_func@(params) != 0) {
+ if (call_evd(params) != 0) {
goto error;
}
- lwork = (fortran_int)*(@fbasetyp@*)&query_work_size;
+ lwork = (fortran_int)*(fbasetyp*)&query_work_size;
lrwork = (fortran_int)query_rwork_size;
liwork = query_iwork_size;
}
- mem_buff2 = malloc(lwork*sizeof(@typ@) +
- lrwork*sizeof(@basetyp@) +
+ mem_buff2 = (npy_uint8 *)malloc(lwork*sizeof(typ) +
+ lrwork*sizeof(basetyp) +
liwork*sizeof(fortran_int));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
- rwork = work + lwork*sizeof(@typ@);
- iwork = rwork + lrwork*sizeof(@basetyp@);
+ rwork = work + lwork*sizeof(typ);
+ iwork = rwork + lrwork*sizeof(basetyp);
- params->WORK = work;
- params->RWORK = rwork;
- params->IWORK = iwork;
+ params->WORK = (typ*)work;
+ params->RWORK = (basetyp*)rwork;
+ params->IWORK = (fortran_int*)iwork;
params->LWORK = lwork;
params->LRWORK = lrwork;
params->LIWORK = liwork;
@@ -1413,16 +1460,7 @@ error:
return 0;
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #BASETYPE = FLOAT, DOUBLE, FLOAT, DOUBLE#
- #typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
- #basetyp = npy_float, npy_double, npy_float, npy_double#
- #lapack_func = ssyevd, dsyevd, cheevd, zheevd#
-**/
/*
* (M, M)->(M,)(M, M)
* dimensions[1] -> M
@@ -1431,8 +1469,9 @@ error:
* args[2] -> A[out]
*/
+template<typename typ>
static NPY_INLINE void
-release_@lapack_func@(EIGH_PARAMS_t *params)
+release_evd(EIGH_PARAMS_t<typ> *params)
{
/* allocated memory in A and WORK */
free(params->A);
@@ -1441,18 +1480,20 @@ release_@lapack_func@(EIGH_PARAMS_t *params)
}
+template<typename typ>
static NPY_INLINE void
-@TYPE@_eigh_wrapper(char JOBZ,
+eigh_wrapper(char JOBZ,
char UPLO,
char**args,
npy_intp const *dimensions,
npy_intp const *steps)
{
+ using basetyp = basetype_t<typ>;
ptrdiff_t outer_steps[3];
size_t iter;
size_t outer_dim = *dimensions++;
size_t op_count = (JOBZ=='N')?2:3;
- EIGH_PARAMS_t eigh_params;
+ EIGH_PARAMS_t<typ> eigh_params;
int error_occurred = get_fp_invalid_and_clear();
for (iter = 0; iter < op_count; ++iter) {
@@ -1460,10 +1501,10 @@ static NPY_INLINE void
}
steps += op_count;
- if (init_@lapack_func@(&eigh_params,
+ if (init_evd(&eigh_params,
JOBZ,
UPLO,
- (fortran_int)dimensions[0])) {
+ (fortran_int)dimensions[0], dispatch_scalar<typ>())) {
LINEARIZE_DATA_t matrix_in_ld;
LINEARIZE_DATA_t eigenvectors_out_ld;
LINEARIZE_DATA_t eigenvalues_out_ld;
@@ -1483,106 +1524,134 @@ static NPY_INLINE void
for (iter = 0; iter < outer_dim; ++iter) {
int not_ok;
/* copy the matrix in */
- linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld);
- not_ok = call_@lapack_func@(&eigh_params);
+ linearize_matrix((typ*)eigh_params.A, (typ*)args[0], &matrix_in_ld);
+ not_ok = call_evd(&eigh_params);
if (!not_ok) {
/* lapack ok, copy result out */
- delinearize_@BASETYPE@_matrix(args[1],
- eigh_params.W,
+ delinearize_matrix((basetyp*)args[1],
+ (basetyp*)eigh_params.W,
&eigenvalues_out_ld);
if ('V' == eigh_params.JOBZ) {
- delinearize_@TYPE@_matrix(args[2],
- eigh_params.A,
+ delinearize_matrix((typ*)args[2],
+ (typ*)eigh_params.A,
&eigenvectors_out_ld);
}
} else {
/* lapack fail, set result to nan */
error_occurred = 1;
- nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld);
+ nan_matrix((basetyp*)args[1], &eigenvalues_out_ld);
if ('V' == eigh_params.JOBZ) {
- nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld);
+ nan_matrix((typ*)args[2], &eigenvectors_out_ld);
}
}
update_pointers((npy_uint8**)args, outer_steps, op_count);
}
- release_@lapack_func@(&eigh_params);
+ release_evd(&eigh_params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat**/
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- */
+template<typename typ>
static void
-@TYPE@_eighlo(char **args,
+eighlo(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps);
+ eigh_wrapper<typ>('V', 'L', args, dimensions, steps);
}
+template<typename typ>
static void
-@TYPE@_eighup(char **args,
+eighup(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void* NPY_UNUSED(func))
{
- @TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps);
+ eigh_wrapper<typ>('V', 'U', args, dimensions, steps);
}
+template<typename typ>
static void
-@TYPE@_eigvalshlo(char **args,
+eigvalshlo(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void* NPY_UNUSED(func))
{
- @TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps);
+ eigh_wrapper<typ>('N', 'L', args, dimensions, steps);
}
+template<typename typ>
static void
-@TYPE@_eigvalshup(char **args,
+eigvalshup(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void* NPY_UNUSED(func))
{
- @TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps);
+ eigh_wrapper<typ>('N', 'U', args, dimensions, steps);
}
-/**end repeat**/
/* -------------------------------------------------------------------------- */
/* Solve family (includes inv) */
-typedef struct gesv_params_struct
+template<typename typ>
+struct GESV_PARAMS_t
{
- void *A; /* A is (N, N) of base type */
- void *B; /* B is (N, NRHS) of base type */
+ typ *A; /* A is (N, N) of base type */
+ typ *B; /* B is (N, NRHS) of base type */
fortran_int * IPIV; /* IPIV is (N) */
fortran_int N;
fortran_int NRHS;
fortran_int LDA;
fortran_int LDB;
-} GESV_PARAMS_t;
-
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
- #ftyp = fortran_real, fortran_doublereal,
- fortran_complex, fortran_doublecomplex#
- #lapack_func = sgesv, dgesv, cgesv, zgesv#
-*/
+};
+
+static NPY_INLINE fortran_int
+call_gesv(GESV_PARAMS_t<fortran_real> *params)
+{
+ fortran_int rv;
+ LAPACK(sgesv)(&params->N, &params->NRHS,
+ params->A, &params->LDA,
+ params->IPIV,
+ params->B, &params->LDB,
+ &rv);
+ return rv;
+}
+
+static NPY_INLINE fortran_int
+call_gesv(GESV_PARAMS_t<fortran_doublereal> *params)
+{
+ fortran_int rv;
+ LAPACK(dgesv)(&params->N, &params->NRHS,
+ params->A, &params->LDA,
+ params->IPIV,
+ params->B, &params->LDB,
+ &rv);
+ return rv;
+}
+
+static NPY_INLINE fortran_int
+call_gesv(GESV_PARAMS_t<fortran_complex> *params)
+{
+ fortran_int rv;
+ LAPACK(cgesv)(&params->N, &params->NRHS,
+ params->A, &params->LDA,
+ params->IPIV,
+ params->B, &params->LDB,
+ &rv);
+ return rv;
+}
static NPY_INLINE fortran_int
-call_@lapack_func@(GESV_PARAMS_t *params)
+call_gesv(GESV_PARAMS_t<fortran_doublecomplex> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->N, &params->NRHS,
+ LAPACK(zgesv)(&params->N, &params->NRHS,
params->A, &params->LDA,
params->IPIV,
params->B, &params->LDB,
@@ -1590,30 +1659,32 @@ call_@lapack_func@(GESV_PARAMS_t *params)
return rv;
}
+
/*
* Initialize the parameters to use in for the lapack function _heev
* Handles buffer allocation
*/
+template<typename ftyp>
static NPY_INLINE int
-init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS)
+init_gesv(GESV_PARAMS_t<ftyp> *params, fortran_int N, fortran_int NRHS)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *a, *b, *ipiv;
size_t safe_N = N;
size_t safe_NRHS = NRHS;
fortran_int ld = fortran_int_max(N, 1);
- mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@) +
- safe_N * safe_NRHS*sizeof(@ftyp@) +
+ mem_buff = (npy_uint8 *)malloc(safe_N * safe_N * sizeof(ftyp) +
+ safe_N * safe_NRHS*sizeof(ftyp) +
safe_N * sizeof(fortran_int));
if (!mem_buff) {
goto error;
}
a = mem_buff;
- b = a + safe_N * safe_N * sizeof(@ftyp@);
- ipiv = b + safe_N * safe_NRHS * sizeof(@ftyp@);
+ b = a + safe_N * safe_N * sizeof(ftyp);
+ ipiv = b + safe_N * safe_NRHS * sizeof(ftyp);
- params->A = a;
- params->B = b;
+ params->A = (ftyp*)a;
+ params->B = (ftyp*)b;
params->IPIV = (fortran_int*)ipiv;
params->N = N;
params->NRHS = NRHS;
@@ -1628,26 +1699,29 @@ init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS)
return 0;
}
+template<typename ftyp>
static NPY_INLINE void
-release_@lapack_func@(GESV_PARAMS_t *params)
+release_gesv(GESV_PARAMS_t<ftyp> *params)
{
/* memory block base is in A */
free(params->A);
memset(params, 0, sizeof(*params));
}
+template<typename typ>
static void
-@TYPE@_solve(char **args, npy_intp const *dimensions, npy_intp const *steps,
+solve(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GESV_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+ GESV_PARAMS_t<ftyp> params;
fortran_int n, nrhs;
int error_occurred = get_fp_invalid_and_clear();
INIT_OUTER_LOOP_3
n = (fortran_int)dimensions[0];
nrhs = (fortran_int)dimensions[1];
- if (init_@lapack_func@(&params, n, nrhs)) {
+ if (init_gesv(&params, n, nrhs)) {
LINEARIZE_DATA_t a_in, b_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
@@ -1656,34 +1730,37 @@ static void
BEGIN_OUTER_LOOP_3
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- linearize_@TYPE@_matrix(params.B, args[1], &b_in);
- not_ok =call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.B, (typ*)args[1], &b_in);
+ not_ok =call_gesv(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
+ delinearize_matrix((typ*)args[2], (typ*)params.B, &r_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[2], &r_out);
+ nan_matrix((typ*)args[2], &r_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_gesv(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
+
+template<typename typ>
static void
-@TYPE@_solve1(char **args, npy_intp const *dimensions, npy_intp const *steps,
+solve1(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GESV_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+ GESV_PARAMS_t<ftyp> params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n;
INIT_OUTER_LOOP_3
n = (fortran_int)dimensions[0];
- if (init_@lapack_func@(&params, n, 1)) {
+ if (init_gesv(&params, n, 1)) {
LINEARIZE_DATA_t a_in, b_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&b_in, 1, n, 1, steps[2]);
@@ -1691,105 +1768,130 @@ static void
BEGIN_OUTER_LOOP_3
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- linearize_@TYPE@_matrix(params.B, args[1], &b_in);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.B, (typ*)args[1], &b_in);
+ not_ok = call_gesv(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
+ delinearize_matrix((typ*)args[2], (typ*)params.B, &r_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[2], &r_out);
+ nan_matrix((typ*)args[2], &r_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_gesv(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
+template<typename typ>
static void
-@TYPE@_inv(char **args, npy_intp const *dimensions, npy_intp const *steps,
+inv(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GESV_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+ GESV_PARAMS_t<ftyp> params;
fortran_int n;
int error_occurred = get_fp_invalid_and_clear();
INIT_OUTER_LOOP_2
n = (fortran_int)dimensions[0];
- if (init_@lapack_func@(&params, n, n)) {
+ if (init_gesv(&params, n, n)) {
LINEARIZE_DATA_t a_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&r_out, n, n, steps[3], steps[2]);
BEGIN_OUTER_LOOP_2
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- identity_@TYPE@_matrix(params.B, n);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ identity_matrix((typ*)params.B, n);
+ not_ok = call_gesv(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[1], params.B, &r_out);
+ delinearize_matrix((typ*)args[1], (typ*)params.B, &r_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[1], &r_out);
+ nan_matrix((typ*)args[1], &r_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_gesv(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat**/
-
/* -------------------------------------------------------------------------- */
/* Cholesky decomposition */
-typedef struct potr_params_struct
+template<typename typ>
+struct POTR_PARAMS_t
{
- void *A;
+ typ *A;
fortran_int N;
fortran_int LDA;
char UPLO;
-} POTR_PARAMS_t;
+};
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #ftyp = fortran_real, fortran_doublereal,
- fortran_complex, fortran_doublecomplex#
- #lapack_func = spotrf, dpotrf, cpotrf, zpotrf#
- */
+static NPY_INLINE fortran_int
+call_potrf(POTR_PARAMS_t<fortran_real> *params)
+{
+ fortran_int rv;
+ LAPACK(spotrf)(&params->UPLO,
+ &params->N, params->A, &params->LDA,
+ &rv);
+ return rv;
+}
+
+static NPY_INLINE fortran_int
+call_potrf(POTR_PARAMS_t<fortran_doublereal> *params)
+{
+ fortran_int rv;
+ LAPACK(dpotrf)(&params->UPLO,
+ &params->N, params->A, &params->LDA,
+ &rv);
+ return rv;
+}
static NPY_INLINE fortran_int
-call_@lapack_func@(POTR_PARAMS_t *params)
+call_potrf(POTR_PARAMS_t<fortran_complex> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->UPLO,
+ LAPACK(cpotrf)(&params->UPLO,
&params->N, params->A, &params->LDA,
&rv);
return rv;
}
+static NPY_INLINE fortran_int
+call_potrf(POTR_PARAMS_t<fortran_doublecomplex> *params)
+{
+ fortran_int rv;
+ LAPACK(zpotrf)(&params->UPLO,
+ &params->N, params->A, &params->LDA,
+ &rv);
+ return rv;
+}
+
+template<typename ftyp>
static NPY_INLINE int
-init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N)
+init_potrf(POTR_PARAMS_t<ftyp> *params, char UPLO, fortran_int N)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *a;
size_t safe_N = N;
fortran_int lda = fortran_int_max(N, 1);
- mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@));
+ mem_buff = (npy_uint8 *)malloc(safe_N * safe_N * sizeof(ftyp));
if (!mem_buff) {
goto error;
}
a = mem_buff;
- params->A = a;
+ params->A = (ftyp*)a;
params->N = N;
params->LDA = lda;
params->UPLO = UPLO;
@@ -1802,18 +1904,21 @@ init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N)
return 0;
}
+template<typename ftyp>
static NPY_INLINE void
-release_@lapack_func@(POTR_PARAMS_t *params)
+release_potrf(POTR_PARAMS_t<ftyp> *params)
{
/* memory block base in A */
free(params->A);
memset(params, 0, sizeof(*params));
}
+template<typename typ>
static void
-@TYPE@_cholesky(char uplo, char **args, npy_intp const *dimensions, npy_intp const *steps)
+cholesky(char uplo, char **args, npy_intp const *dimensions, npy_intp const *steps)
{
- POTR_PARAMS_t params;
+ using ftyp = fortran_type_t<typ>;
+ POTR_PARAMS_t<ftyp> params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n;
INIT_OUTER_LOOP_2
@@ -1821,50 +1926,50 @@ static void
assert(uplo == 'L');
n = (fortran_int)dimensions[0];
- if (init_@lapack_func@(&params, uplo, n)) {
+ if (init_potrf(&params, uplo, n)) {
LINEARIZE_DATA_t a_in, r_out;
init_linearize_data(&a_in, n, n, steps[1], steps[0]);
init_linearize_data(&r_out, n, n, steps[3], steps[2]);
BEGIN_OUTER_LOOP_2
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix(params.A, (ftyp*)args[0], &a_in);
+ not_ok = call_potrf(&params);
if (!not_ok) {
- triu_@TYPE@_matrix(params.A, params.N);
- delinearize_@TYPE@_matrix(args[1], params.A, &r_out);
+ triu_matrix(params.A, params.N);
+ delinearize_matrix((ftyp*)args[1], params.A, &r_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[1], &r_out);
+ nan_matrix((ftyp*)args[1], &r_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_potrf(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
+template<typename typ>
static void
-@TYPE@_cholesky_lo(char **args, npy_intp const *dimensions, npy_intp const *steps,
+cholesky_lo(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_cholesky('L', args, dimensions, steps);
+ cholesky<typ>('L', args, dimensions, steps);
}
-/**end repeat**/
-
/* -------------------------------------------------------------------------- */
/* eig family */
-typedef struct geev_params_struct {
- void *A;
- void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/
- void *WI;
- void *VLR; /* REAL VL buffers for _geev where _ is s, d */
- void *VRR; /* REAL VR buffers for _geev where _ is s, d */
- void *WORK;
- void *W; /* final w */
- void *VL; /* final vl */
- void *VR; /* final vr */
+template<typename typ>
+struct GEEV_PARAMS_t {
+ typ *A;
+ basetype_t<typ> *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/
+ typ *WI;
+ typ *VLR; /* REAL VL buffers for _geev where _ is s, d */
+ typ *VRR; /* REAL VR buffers for _geev where _ is s, d */
+ typ *WORK;
+ typ *W; /* final w */
+ typ *VL; /* final vl */
+ typ *VR; /* final vr */
fortran_int N;
fortran_int LDA;
@@ -1874,10 +1979,11 @@ typedef struct geev_params_struct {
char JOBVL;
char JOBVR;
-} GEEV_PARAMS_t;
+};
+template<typename typ>
static NPY_INLINE void
-dump_geev_params(const char *name, GEEV_PARAMS_t* params)
+dump_geev_params(const char *name, GEEV_PARAMS_t<typ>* params)
{
TRACE_TXT("\n%s\n"
@@ -1922,20 +2028,25 @@ dump_geev_params(const char *name, GEEV_PARAMS_t* params)
"JOBVR", params->JOBVR);
}
-/**begin repeat
- #TYPE = FLOAT, DOUBLE#
- #CTYPE = CFLOAT, CDOUBLE#
- #typ = float, double#
- #complextyp = COMPLEX_t, DOUBLECOMPLEX_t#
- #lapack_func = sgeev, dgeev#
- #zero = 0.0f, 0.0#
-*/
+static NPY_INLINE fortran_int
+call_geev(GEEV_PARAMS_t<float>* params)
+{
+ fortran_int rv;
+ LAPACK(sgeev)(&params->JOBVL, &params->JOBVR,
+ &params->N, params->A, &params->LDA,
+ params->WR, params->WI,
+ params->VLR, &params->LDVL,
+ params->VRR, &params->LDVR,
+ params->WORK, &params->LWORK,
+ &rv);
+ return rv;
+}
static NPY_INLINE fortran_int
-call_@lapack_func@(GEEV_PARAMS_t* params)
+call_geev(GEEV_PARAMS_t<double>* params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->JOBVL, &params->JOBVR,
+ LAPACK(dgeev)(&params->JOBVL, &params->JOBVR,
&params->N, params->A, &params->LDA,
params->WR, params->WI,
params->VLR, &params->LDVL,
@@ -1945,18 +2056,21 @@ call_@lapack_func@(GEEV_PARAMS_t* params)
return rv;
}
+
+template<typename typ>
static NPY_INLINE int
-init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
+init_geev(GEEV_PARAMS_t<typ> *params, char jobvl, char jobvr, fortran_int n,
+scalar_trait)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr;
size_t safe_n = n;
- size_t a_size = safe_n * safe_n * sizeof(@typ@);
- size_t wr_size = safe_n * sizeof(@typ@);
- size_t wi_size = safe_n * sizeof(@typ@);
- size_t vlr_size = jobvl=='V' ? safe_n * safe_n * sizeof(@typ@) : 0;
- size_t vrr_size = jobvr=='V' ? safe_n * safe_n * sizeof(@typ@) : 0;
+ size_t a_size = safe_n * safe_n * sizeof(typ);
+ size_t wr_size = safe_n * sizeof(typ);
+ size_t wi_size = safe_n * sizeof(typ);
+ size_t vlr_size = jobvl=='V' ? safe_n * safe_n * sizeof(typ) : 0;
+ size_t vrr_size = jobvr=='V' ? safe_n * safe_n * sizeof(typ) : 0;
size_t w_size = wr_size*2;
size_t vl_size = vlr_size*2;
size_t vr_size = vrr_size*2;
@@ -1964,7 +2078,7 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
fortran_int ld = fortran_int_max(n, 1);
/* allocate data for known sizes (all but work) */
- mem_buff = malloc(a_size + wr_size + wi_size +
+ mem_buff = (npy_uint8 *)malloc(a_size + wr_size + wi_size +
vlr_size + vrr_size +
w_size + vl_size + vr_size);
if (!mem_buff) {
@@ -1980,14 +2094,14 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
vl = w + w_size;
vr = vl + vl_size;
- params->A = a;
- params->WR = wr;
- params->WI = wi;
- params->VLR = vlr;
- params->VRR = vrr;
- params->W = w;
- params->VL = vl;
- params->VR = vr;
+ params->A = (typ*)a;
+ params->WR = (typ*)wr;
+ params->WI = (typ*)wi;
+ params->VLR = (typ*)vlr;
+ params->VRR = (typ*)vrr;
+ params->W = (typ*)w;
+ params->VL = (typ*)vl;
+ params->VR = (typ*)vr;
params->N = n;
params->LDA = ld;
params->LDVL = ld;
@@ -1997,26 +2111,26 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
/* Work size query */
{
- @typ@ work_size_query;
+ typ work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
- if (call_@lapack_func@(params) != 0) {
+ if (call_geev(params) != 0) {
goto error;
}
work_count = (size_t)work_size_query;
}
- mem_buff2 = malloc(work_count*sizeof(@typ@));
+ mem_buff2 = (npy_uint8 *)malloc(work_count*sizeof(typ));
if (!mem_buff2) {
goto error;
}
work = mem_buff2;
params->LWORK = (fortran_int)work_count;
- params->WORK = work;
+ params->WORK = (typ*)work;
return 1;
error:
@@ -2027,42 +2141,45 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
return 0;
}
+template<typename complextyp, typename typ>
static NPY_INLINE void
-mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n)
+mk_complex_array_from_real(complextyp *c, const typ *re, size_t n)
{
size_t iter;
for (iter = 0; iter < n; ++iter) {
- c[iter].array[0] = re[iter];
- c[iter].array[1] = @zero@;
+ c[iter].r = re[iter];
+ c[iter].i = numeric_limits<typ>::zero;
}
}
+template<typename complextyp, typename typ>
static NPY_INLINE void
-mk_@TYPE@_complex_array(@complextyp@ *c,
- const @typ@ *re,
- const @typ@ *im,
+mk_complex_array(complextyp *c,
+ const typ *re,
+ const typ *im,
size_t n)
{
size_t iter;
for (iter = 0; iter < n; ++iter) {
- c[iter].array[0] = re[iter];
- c[iter].array[1] = im[iter];
+ c[iter].r = re[iter];
+ c[iter].i = im[iter];
}
}
+template<typename complextyp, typename typ>
static NPY_INLINE void
-mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c,
- const @typ@ *r,
+mk_complex_array_conjugate_pair(complextyp *c,
+ const typ *r,
size_t n)
{
size_t iter;
for (iter = 0; iter < n; ++iter) {
- @typ@ re = r[iter];
- @typ@ im = r[iter+n];
- c[iter].array[0] = re;
- c[iter].array[1] = im;
- c[iter+n].array[0] = re;
- c[iter+n].array[1] = -im;
+ typ re = r[iter];
+ typ im = r[iter+n];
+ c[iter].r = re;
+ c[iter].i = im;
+ c[iter+n].r = re;
+ c[iter+n].i = -im;
}
}
@@ -2073,24 +2190,25 @@ mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c,
* i is the eigenvalue imaginary part produced by sgeev/zgeev
* n is so that the order of the matrix is n by n
*/
+template<typename complextyp, typename typ>
static NPY_INLINE void
-mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c,
- const @typ@ *r,
- const @typ@ *i,
+mk_geev_complex_eigenvectors(complextyp *c,
+ const typ *r,
+ const typ *i,
size_t n)
{
size_t iter = 0;
while (iter < n)
{
- if (i[iter] == @zero@) {
+ if (i[iter] == numeric_limits<typ>::zero) {
/* eigenvalue was real, eigenvectors as well... */
- mk_@TYPE@_complex_array_from_real(c, r, n);
+ mk_complex_array_from_real(c, r, n);
c += n;
r += n;
iter ++;
} else {
/* eigenvalue was complex, generate a pair of eigenvectors */
- mk_@TYPE@_complex_array_conjugate_pair(c, r, n);
+ mk_complex_array_conjugate_pair(c, r, n);
c += 2*n;
r += 2*n;
iter += 2;
@@ -2099,43 +2217,49 @@ mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c,
}
+template<typename complextyp, typename typ>
static NPY_INLINE void
-process_@lapack_func@_results(GEEV_PARAMS_t *params)
+process_geev_results(GEEV_PARAMS_t<typ> *params, scalar_trait)
{
/* REAL versions of geev need the results to be translated
* into complex versions. This is the way to deal with imaginary
* results. In our gufuncs we will always return complex arrays!
*/
- mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N);
+ mk_complex_array((complextyp*)params->W, (typ*)params->WR, (typ*)params->WI, params->N);
/* handle the eigenvectors */
if ('V' == params->JOBVL) {
- mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR,
- params->WI, params->N);
+ mk_geev_complex_eigenvectors((complextyp*)params->VL, (typ*)params->VLR,
+ (typ*)params->WI, params->N);
}
if ('V' == params->JOBVR) {
- mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR,
- params->WI, params->N);
+ mk_geev_complex_eigenvectors((complextyp*)params->VR, (typ*)params->VRR,
+ (typ*)params->WI, params->N);
}
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE = CFLOAT, CDOUBLE#
- #typ = COMPLEX_t, DOUBLECOMPLEX_t#
- #ftyp = fortran_complex, fortran_doublecomplex#
- #realtyp = float, double#
- #lapack_func = cgeev, zgeev#
- */
+static NPY_INLINE fortran_int
+call_geev(GEEV_PARAMS_t<fortran_complex>* params)
+{
+ fortran_int rv;
+ LAPACK(cgeev)(&params->JOBVL, &params->JOBVR,
+ &params->N, params->A, &params->LDA,
+ params->W,
+ params->VL, &params->LDVL,
+ params->VR, &params->LDVR,
+ params->WORK, &params->LWORK,
+ params->WR, /* actually RWORK */
+ &rv);
+ return rv;
+}
static NPY_INLINE fortran_int
-call_@lapack_func@(GEEV_PARAMS_t* params)
+call_geev(GEEV_PARAMS_t<fortran_doublecomplex>* params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->JOBVL, &params->JOBVR,
+ LAPACK(zgeev)(&params->JOBVL, &params->JOBVR,
&params->N, params->A, &params->LDA,
params->W,
params->VL, &params->LDVL,
@@ -2146,26 +2270,28 @@ call_@lapack_func@(GEEV_PARAMS_t* params)
return rv;
}
+template<typename ftyp>
static NPY_INLINE int
-init_@lapack_func@(GEEV_PARAMS_t* params,
+init_geev(GEEV_PARAMS_t<ftyp>* params,
char jobvl,
char jobvr,
- fortran_int n)
+ fortran_int n, complex_trait)
{
+using realtyp = basetype_t<ftyp>;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *w, *vl, *vr, *work, *rwork;
size_t safe_n = n;
- size_t a_size = safe_n * safe_n * sizeof(@ftyp@);
- size_t w_size = safe_n * sizeof(@ftyp@);
- size_t vl_size = jobvl=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0;
- size_t vr_size = jobvr=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0;
- size_t rwork_size = 2 * safe_n * sizeof(@realtyp@);
+ size_t a_size = safe_n * safe_n * sizeof(ftyp);
+ size_t w_size = safe_n * sizeof(ftyp);
+ size_t vl_size = jobvl=='V'? safe_n * safe_n * sizeof(ftyp) : 0;
+ size_t vr_size = jobvr=='V'? safe_n * safe_n * sizeof(ftyp) : 0;
+ size_t rwork_size = 2 * safe_n * sizeof(realtyp);
size_t work_count = 0;
size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size;
fortran_int ld = fortran_int_max(n, 1);
- mem_buff = malloc(total_size);
+ mem_buff = (npy_uint8 *)malloc(total_size);
if (!mem_buff) {
goto error;
}
@@ -2176,14 +2302,14 @@ init_@lapack_func@(GEEV_PARAMS_t* params,
vr = vl + vl_size;
rwork = vr + vr_size;
- params->A = a;
- params->WR = rwork;
+ params->A = (ftyp*)a;
+ params->WR = (realtyp*)rwork;
params->WI = NULL;
params->VLR = NULL;
params->VRR = NULL;
- params->VL = vl;
- params->VR = vr;
- params->W = w;
+ params->VL = (ftyp*)vl;
+ params->VR = (ftyp*)vr;
+ params->W = (ftyp*)w;
params->N = n;
params->LDA = ld;
params->LDVL = ld;
@@ -2193,21 +2319,21 @@ init_@lapack_func@(GEEV_PARAMS_t* params,
/* Work size query */
{
- @typ@ work_size_query;
+ ftyp work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
- if (call_@lapack_func@(params) != 0) {
+ if (call_geev(params) != 0) {
goto error;
}
- work_count = (size_t) work_size_query.array[0];
+ work_count = (size_t) work_size_query.r;
/* Fix a bug in lapack 3.0.0 */
if(work_count == 0) work_count = 1;
}
- mem_buff2 = malloc(work_count*sizeof(@ftyp@));
+ mem_buff2 = (npy_uint8 *)malloc(work_count*sizeof(ftyp));
if (!mem_buff2) {
goto error;
}
@@ -2215,7 +2341,7 @@ init_@lapack_func@(GEEV_PARAMS_t* params,
work = mem_buff2;
params->LWORK = (fortran_int)work_count;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
return 1;
error:
@@ -2226,32 +2352,27 @@ init_@lapack_func@(GEEV_PARAMS_t* params,
return 0;
}
-
+template<typename complextyp, typename typ>
static NPY_INLINE void
-process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params))
+process_geev_results(GEEV_PARAMS_t<typ> *NPY_UNUSED(params), complex_trait)
{
/* nothing to do here, complex versions are ready to copy out */
}
-/**end repeat**/
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CDOUBLE#
- #COMPLEXTYPE = CFLOAT, CDOUBLE, CDOUBLE#
- #ftype = fortran_real, fortran_doublereal, fortran_doublecomplex#
- #lapack_func = sgeev, dgeev, zgeev#
- */
+template<typename typ>
static NPY_INLINE void
-release_@lapack_func@(GEEV_PARAMS_t *params)
+release_geev(GEEV_PARAMS_t<typ> *params)
{
free(params->WORK);
free(params->A);
memset(params, 0, sizeof(*params));
}
+template<typename fctype, typename ftype>
static NPY_INLINE void
-@TYPE@_eig_wrapper(char JOBVL,
+eig_wrapper(char JOBVL,
char JOBVR,
char**args,
npy_intp const *dimensions,
@@ -2262,7 +2383,7 @@ static NPY_INLINE void
size_t outer_dim = *dimensions++;
size_t op_count = 2;
int error_occurred = get_fp_invalid_and_clear();
- GEEV_PARAMS_t geev_params;
+ GEEV_PARAMS_t<ftype> geev_params;
assert(JOBVL == 'N');
@@ -2275,9 +2396,9 @@ static NPY_INLINE void
}
steps += op_count;
- if (init_@lapack_func@(&geev_params,
+ if (init_geev(&geev_params,
JOBVL, JOBVR,
- (fortran_int)dimensions[0])) {
+ (fortran_int)dimensions[0], dispatch_scalar<ftype>())) {
LINEARIZE_DATA_t a_in;
LINEARIZE_DATA_t w_out;
LINEARIZE_DATA_t vl_out;
@@ -2307,78 +2428,81 @@ static NPY_INLINE void
int not_ok;
char **arg_iter = args;
/* copy the matrix in */
- linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in);
- not_ok = call_@lapack_func@(&geev_params);
+ linearize_matrix((ftype*)geev_params.A, (ftype*)*arg_iter++, &a_in);
+ not_ok = call_geev(&geev_params);
if (!not_ok) {
- process_@lapack_func@_results(&geev_params);
- delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
- geev_params.W,
+ process_geev_results<fctype>(&geev_params,
+dispatch_scalar<ftype>{});
+ delinearize_matrix((fctype*)*arg_iter++,
+ (fctype*)geev_params.W,
&w_out);
if ('V' == geev_params.JOBVL) {
- delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
- geev_params.VL,
+ delinearize_matrix((fctype*)*arg_iter++,
+ (fctype*)geev_params.VL,
&vl_out);
}
if ('V' == geev_params.JOBVR) {
- delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
- geev_params.VR,
+ delinearize_matrix((fctype*)*arg_iter++,
+ (fctype*)geev_params.VR,
&vr_out);
}
} else {
/* geev failed */
error_occurred = 1;
- nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out);
+ nan_matrix((fctype*)*arg_iter++, &w_out);
if ('V' == geev_params.JOBVL) {
- nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out);
+ nan_matrix((fctype*)*arg_iter++, &vl_out);
}
if ('V' == geev_params.JOBVR) {
- nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out);
+ nan_matrix((fctype*)*arg_iter++, &vr_out);
}
}
update_pointers((npy_uint8**)args, outer_steps, op_count);
}
- release_@lapack_func@(&geev_params);
+ release_geev(&geev_params);
}
set_fp_invalid_or_clear(error_occurred);
}
+template<typename fctype, typename ftype>
static void
-@TYPE@_eig(char **args,
+eig(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_eig_wrapper('N', 'V', args, dimensions, steps);
+ eig_wrapper<fctype, ftype>('N', 'V', args, dimensions, steps);
}
+template<typename fctype, typename ftype>
static void
-@TYPE@_eigvals(char **args,
+eigvals(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_eig_wrapper('N', 'N', args, dimensions, steps);
+ eig_wrapper<fctype, ftype>('N', 'N', args, dimensions, steps);
}
-/**end repeat**/
/* -------------------------------------------------------------------------- */
/* singular value decomposition */
-typedef struct gessd_params_struct
+template<typename ftyp>
+struct GESDD_PARAMS_t
{
- void *A;
- void *S;
- void *U;
- void *VT;
- void *WORK;
- void *RWORK;
- void *IWORK;
+ ftyp *A;
+ basetype_t<ftyp> *S;
+ ftyp *U;
+ ftyp *VT;
+ ftyp *WORK;
+ basetype_t<ftyp> *RWORK;
+ fortran_int *IWORK;
fortran_int M;
fortran_int N;
@@ -2387,12 +2511,13 @@ typedef struct gessd_params_struct
fortran_int LDVT;
fortran_int LWORK;
char JOBZ;
-} GESDD_PARAMS_t;
+} ;
+template<typename ftyp>
static NPY_INLINE void
dump_gesdd_params(const char *name,
- GESDD_PARAMS_t *params)
+ GESDD_PARAMS_t<ftyp> *params)
{
TRACE_TXT("\n%s:\n"\
@@ -2462,43 +2587,51 @@ compute_urows_vtcolumns(char jobz,
return 1;
}
-
-/**begin repeat
- #TYPE = FLOAT, DOUBLE#
- #lapack_func = sgesdd, dgesdd#
- #ftyp = fortran_real, fortran_doublereal#
- */
-
static NPY_INLINE fortran_int
-call_@lapack_func@(GESDD_PARAMS_t *params)
+call_gesdd(GESDD_PARAMS_t<fortran_real> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->JOBZ, &params->M, &params->N,
+ LAPACK(sgesdd)(&params->JOBZ, &params->M, &params->N,
params->A, &params->LDA,
params->S,
params->U, &params->LDU,
params->VT, &params->LDVT,
params->WORK, &params->LWORK,
- params->IWORK,
+ (fortran_int*)params->IWORK,
+ &rv);
+ return rv;
+}
+static NPY_INLINE fortran_int
+call_gesdd(GESDD_PARAMS_t<fortran_doublereal> *params)
+{
+ fortran_int rv;
+ LAPACK(dgesdd)(&params->JOBZ, &params->M, &params->N,
+ params->A, &params->LDA,
+ params->S,
+ params->U, &params->LDU,
+ params->VT, &params->LDVT,
+ params->WORK, &params->LWORK,
+ (fortran_int*)params->IWORK,
&rv);
return rv;
}
+template<typename ftyp>
static NPY_INLINE int
-init_@lapack_func@(GESDD_PARAMS_t *params,
+init_gesdd(GESDD_PARAMS_t<ftyp> *params,
char jobz,
fortran_int m,
- fortran_int n)
+ fortran_int n, scalar_trait)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *s, *u, *vt, *work, *iwork;
size_t safe_m = m;
size_t safe_n = n;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
fortran_int min_m_n = fortran_int_min(m, n);
size_t safe_min_m_n = min_m_n;
- size_t s_size = safe_min_m_n * sizeof(@ftyp@);
+ size_t s_size = safe_min_m_n * sizeof(ftyp);
fortran_int u_row_count, vt_column_count;
size_t safe_u_row_count, safe_vt_column_count;
size_t u_size, vt_size;
@@ -2514,10 +2647,10 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
safe_u_row_count = u_row_count;
safe_vt_column_count = vt_column_count;
- u_size = safe_u_row_count * safe_m * sizeof(@ftyp@);
- vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@);
+ u_size = safe_u_row_count * safe_m * sizeof(ftyp);
+ vt_size = safe_n * safe_vt_column_count * sizeof(ftyp);
- mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size);
+ mem_buff = (npy_uint8 *)malloc(a_size + s_size + u_size + vt_size + iwork_size);
if (!mem_buff) {
goto error;
@@ -2534,12 +2667,12 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
params->M = m;
params->N = n;
- params->A = a;
- params->S = s;
- params->U = u;
- params->VT = vt;
+ params->A = (ftyp*)a;
+ params->S = (ftyp*)s;
+ params->U = (ftyp*)u;
+ params->VT = (ftyp*)vt;
params->RWORK = NULL;
- params->IWORK = iwork;
+ params->IWORK = (fortran_int*)iwork;
params->LDA = ld;
params->LDU = ld;
params->LDVT = vt_column_count;
@@ -2547,22 +2680,22 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
/* Work size query */
{
- @ftyp@ work_size_query;
+ ftyp work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
- if (call_@lapack_func@(params) != 0) {
+ if (call_gesdd(params) != 0) {
goto error;
}
work_count = (fortran_int)work_size_query;
/* Fix a bug in lapack 3.0.0 */
if(work_count == 0) work_count = 1;
- work_size = (size_t)work_count * sizeof(@ftyp@);
+ work_size = (size_t)work_count * sizeof(ftyp);
}
- mem_buff2 = malloc(work_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size);
if (!mem_buff2) {
goto error;
}
@@ -2570,7 +2703,7 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
work = mem_buff2;
params->LWORK = work_count;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
return 1;
error:
@@ -2582,21 +2715,26 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE = CFLOAT, CDOUBLE#
- #ftyp = fortran_complex, fortran_doublecomplex#
- #frealtyp = fortran_real, fortran_doublereal#
- #typ = COMPLEX_t, DOUBLECOMPLEX_t#
- #lapack_func = cgesdd, zgesdd#
- */
-
static NPY_INLINE fortran_int
-call_@lapack_func@(GESDD_PARAMS_t *params)
+call_gesdd(GESDD_PARAMS_t<fortran_complex> *params)
+{
+ fortran_int rv;
+ LAPACK(cgesdd)(&params->JOBZ, &params->M, &params->N,
+ params->A, &params->LDA,
+ params->S,
+ params->U, &params->LDU,
+ params->VT, &params->LDVT,
+ params->WORK, &params->LWORK,
+ params->RWORK,
+ params->IWORK,
+ &rv);
+ return rv;
+}
+static NPY_INLINE fortran_int
+call_gesdd(GESDD_PARAMS_t<fortran_doublecomplex> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->JOBZ, &params->M, &params->N,
+ LAPACK(zgesdd)(&params->JOBZ, &params->M, &params->N,
params->A, &params->LDA,
params->S,
params->U, &params->LDU,
@@ -2608,12 +2746,14 @@ call_@lapack_func@(GESDD_PARAMS_t *params)
return rv;
}
+template<typename ftyp>
static NPY_INLINE int
-init_@lapack_func@(GESDD_PARAMS_t *params,
+init_gesdd(GESDD_PARAMS_t<ftyp> *params,
char jobz,
fortran_int m,
- fortran_int n)
+ fortran_int n, complex_trait)
{
+using frealtyp = basetype_t<ftyp>;
npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL;
npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork;
size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size;
@@ -2632,17 +2772,17 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
safe_u_row_count = u_row_count;
safe_vt_column_count = vt_column_count;
- a_size = safe_m * safe_n * sizeof(@ftyp@);
- s_size = safe_min_m_n * sizeof(@frealtyp@);
- u_size = safe_u_row_count * safe_m * sizeof(@ftyp@);
- vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@);
+ a_size = safe_m * safe_n * sizeof(ftyp);
+ s_size = safe_min_m_n * sizeof(frealtyp);
+ u_size = safe_u_row_count * safe_m * sizeof(ftyp);
+ vt_size = safe_n * safe_vt_column_count * sizeof(ftyp);
rwork_size = 'N'==jobz?
(7 * safe_min_m_n) :
(5*safe_min_m_n * safe_min_m_n + 5*safe_min_m_n);
- rwork_size *= sizeof(@ftyp@);
+ rwork_size *= sizeof(ftyp);
iwork_size = 8 * safe_min_m_n* sizeof(fortran_int);
- mem_buff = malloc(a_size +
+ mem_buff = (npy_uint8 *)malloc(a_size +
s_size +
u_size +
vt_size +
@@ -2662,12 +2802,12 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
/* fix vt_column_count so that it is a valid lapack parameter (0 is not) */
vt_column_count = fortran_int_max(1, vt_column_count);
- params->A = a;
- params->S = s;
- params->U = u;
- params->VT = vt;
- params->RWORK = rwork;
- params->IWORK = iwork;
+ params->A = (ftyp*)a;
+ params->S = (frealtyp*)s;
+ params->U = (ftyp*)u;
+ params->VT = (ftyp*)vt;
+ params->RWORK = (frealtyp*)rwork;
+ params->IWORK = (fortran_int*)iwork;
params->M = m;
params->N = n;
params->LDA = ld;
@@ -2677,22 +2817,22 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
/* Work size query */
{
- @ftyp@ work_size_query;
+ ftyp work_size_query;
params->LWORK = -1;
params->WORK = &work_size_query;
- if (call_@lapack_func@(params) != 0) {
+ if (call_gesdd(params) != 0) {
goto error;
}
- work_count = (fortran_int)((@typ@*)&work_size_query)->array[0];
+ work_count = (fortran_int)(*(frealtyp*)&work_size_query);
/* Fix a bug in lapack 3.0.0 */
if(work_count == 0) work_count = 1;
- work_size = (size_t)work_count * sizeof(@ftyp@);
+ work_size = (size_t)work_count * sizeof(ftyp);
}
- mem_buff2 = malloc(work_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size);
if (!mem_buff2) {
goto error;
}
@@ -2700,7 +2840,7 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
work = mem_buff2;
params->LWORK = work_count;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
return 1;
error:
@@ -2711,16 +2851,10 @@ init_@lapack_func@(GESDD_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- #REALTYPE = FLOAT, DOUBLE, FLOAT, DOUBLE#
- #lapack_func = sgesdd, dgesdd, cgesdd, zgesdd#
- */
+template<typename typ>
static NPY_INLINE void
-release_@lapack_func@(GESDD_PARAMS_t* params)
+release_gesdd(GESDD_PARAMS_t<typ>* params)
{
/* A and WORK contain allocated blocks */
free(params->A);
@@ -2728,28 +2862,31 @@ release_@lapack_func@(GESDD_PARAMS_t* params)
memset(params, 0, sizeof(*params));
}
+template<typename typ>
static NPY_INLINE void
-@TYPE@_svd_wrapper(char JOBZ,
+svd_wrapper(char JOBZ,
char **args,
npy_intp const *dimensions,
npy_intp const *steps)
{
+using basetyp = basetype_t<typ>;
ptrdiff_t outer_steps[4];
int error_occurred = get_fp_invalid_and_clear();
size_t iter;
size_t outer_dim = *dimensions++;
size_t op_count = (JOBZ=='N')?2:4;
- GESDD_PARAMS_t params;
+ GESDD_PARAMS_t<typ> params;
for (iter = 0; iter < op_count; ++iter) {
outer_steps[iter] = (ptrdiff_t) steps[iter];
}
steps += op_count;
- if (init_@lapack_func@(&params,
- JOBZ,
- (fortran_int)dimensions[0],
- (fortran_int)dimensions[1])) {
+ if (init_gesdd(&params,
+ JOBZ,
+ (fortran_int)dimensions[0],
+ (fortran_int)dimensions[1],
+dispatch_scalar<typ>())) {
LINEARIZE_DATA_t a_in, u_out, s_out, v_out;
fortran_int min_m_n = params.M < params.N ? params.M : params.N;
@@ -2780,97 +2917,95 @@ static NPY_INLINE void
for (iter = 0; iter < outer_dim; ++iter) {
int not_ok;
/* copy the matrix in */
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ not_ok = call_gesdd(&params);
if (!not_ok) {
if ('N' == params.JOBZ) {
- delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out);
+ delinearize_matrix((basetyp*)args[1], (basetyp*)params.S, &s_out);
} else {
if ('A' == params.JOBZ && min_m_n == 0) {
/* Lapack has betrayed us and left these uninitialized,
* so produce an identity matrix for whichever of u
* and v is not empty.
*/
- identity_@TYPE@_matrix(params.U, params.M);
- identity_@TYPE@_matrix(params.VT, params.N);
+ identity_matrix((typ*)params.U, params.M);
+ identity_matrix((typ*)params.VT, params.N);
}
- delinearize_@TYPE@_matrix(args[1], params.U, &u_out);
- delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out);
- delinearize_@TYPE@_matrix(args[3], params.VT, &v_out);
+ delinearize_matrix((typ*)args[1], (typ*)params.U, &u_out);
+ delinearize_matrix((basetyp*)args[2], (basetyp*)params.S, &s_out);
+ delinearize_matrix((typ*)args[3], (typ*)params.VT, &v_out);
}
} else {
error_occurred = 1;
if ('N' == params.JOBZ) {
- nan_@REALTYPE@_matrix(args[1], &s_out);
+ nan_matrix((basetyp*)args[1], &s_out);
} else {
- nan_@TYPE@_matrix(args[1], &u_out);
- nan_@REALTYPE@_matrix(args[2], &s_out);
- nan_@TYPE@_matrix(args[3], &v_out);
+ nan_matrix((typ*)args[1], &u_out);
+ nan_matrix((basetyp*)args[2], &s_out);
+ nan_matrix((typ*)args[3], &v_out);
}
}
update_pointers((npy_uint8**)args, outer_steps, op_count);
}
- release_@lapack_func@(&params);
+ release_gesdd(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat*/
-/* svd gufunc entry points */
-/**begin repeat
- #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE#
- */
+template<typename typ>
static void
-@TYPE@_svd_N(char **args,
+svd_N(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_svd_wrapper('N', args, dimensions, steps);
+ svd_wrapper<fortran_type_t<typ>>('N', args, dimensions, steps);
}
+template<typename typ>
static void
-@TYPE@_svd_S(char **args,
+svd_S(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_svd_wrapper('S', args, dimensions, steps);
+ svd_wrapper<fortran_type_t<typ>>('S', args, dimensions, steps);
}
+template<typename typ>
static void
-@TYPE@_svd_A(char **args,
+svd_A(char **args,
npy_intp const *dimensions,
npy_intp const *steps,
void *NPY_UNUSED(func))
{
- @TYPE@_svd_wrapper('A', args, dimensions, steps);
+ svd_wrapper<fortran_type_t<typ>>('A', args, dimensions, steps);
}
-/**end repeat**/
-
/* -------------------------------------------------------------------------- */
/* qr (modes - r, raw) */
-typedef struct geqfr_params_struct
+template<typename typ>
+struct GEQRF_PARAMS_t
{
fortran_int M;
fortran_int N;
- void *A;
+ typ *A;
fortran_int LDA;
- void* TAU;
- void *WORK;
+ typ* TAU;
+ typ *WORK;
fortran_int LWORK;
-} GEQRF_PARAMS_t;
+};
+template<typename typ>
static inline void
dump_geqrf_params(const char *name,
- GEQRF_PARAMS_t *params)
+ GEQRF_PARAMS_t<typ> *params)
{
TRACE_TXT("\n%s:\n"\
@@ -2894,15 +3029,22 @@ dump_geqrf_params(const char *name,
"LWORK", (int)params->LWORK);
}
-/**begin repeat
- #lapack_func=dgeqrf,zgeqrf#
- */
-
static inline fortran_int
-call_@lapack_func@(GEQRF_PARAMS_t *params)
+call_geqrf(GEQRF_PARAMS_t<double> *params)
+{
+ fortran_int rv;
+ LAPACK(dgeqrf)(&params->M, &params->N,
+ params->A, &params->LDA,
+ params->TAU,
+ params->WORK, &params->LWORK,
+ &rv);
+ return rv;
+}
+static inline fortran_int
+call_geqrf(GEQRF_PARAMS_t<f2c_doublecomplex> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->M, &params->N,
+ LAPACK(zgeqrf)(&params->M, &params->N,
params->A, &params->LDA,
params->TAU,
params->WORK, &params->LWORK,
@@ -2910,18 +3052,13 @@ call_@lapack_func@(GEQRF_PARAMS_t *params)
return rv;
}
-/**end repeat**/
-/**begin repeat
- #TYPE=DOUBLE#
- #lapack_func=dgeqrf#
- #ftyp=fortran_doublereal#
- */
static inline int
-init_@lapack_func@(GEQRF_PARAMS_t *params,
+init_geqrf(GEQRF_PARAMS_t<fortran_doublereal> *params,
fortran_int m,
fortran_int n)
{
+using ftyp = fortran_doublereal;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *tau, *work;
@@ -2930,14 +3067,14 @@ init_@lapack_func@(GEQRF_PARAMS_t *params,
size_t safe_m = m;
size_t safe_n = n;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
- size_t tau_size = safe_min_m_n * sizeof(@ftyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
+ size_t tau_size = safe_min_m_n * sizeof(ftyp);
fortran_int work_count;
size_t work_size;
fortran_int lda = fortran_int_max(1, m);
- mem_buff = malloc(a_size + tau_size);
+ mem_buff = (npy_uint8 *)malloc(a_size + tau_size);
if (!mem_buff)
goto error;
@@ -2949,35 +3086,35 @@ init_@lapack_func@(GEQRF_PARAMS_t *params,
params->M = m;
params->N = n;
- params->A = a;
- params->TAU = tau;
+ params->A = (ftyp*)a;
+ params->TAU = (ftyp*)tau;
params->LDA = lda;
{
/* compute optimal work size */
- @ftyp@ work_size_query;
+ ftyp work_size_query;
params->WORK = &work_size_query;
params->LWORK = -1;
- if (call_@lapack_func@(params) != 0)
+ if (call_geqrf(params) != 0)
goto error;
- work_count = (fortran_int) *(@ftyp@*) params->WORK;
+ work_count = (fortran_int) *(ftyp*) params->WORK;
}
params->LWORK = fortran_int_max(fortran_int_max(1, n), work_count);
- work_size = (size_t) params->LWORK * sizeof(@ftyp@);
- mem_buff2 = malloc(work_size);
+ work_size = (size_t) params->LWORK * sizeof(ftyp);
+ mem_buff2 = (npy_uint8 *)malloc(work_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
return 1;
error:
@@ -2989,18 +3126,13 @@ init_@lapack_func@(GEQRF_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-/**begin repeat
- #TYPE=CDOUBLE#
- #lapack_func=zgeqrf#
- #ftyp=fortran_doublecomplex#
- */
static inline int
-init_@lapack_func@(GEQRF_PARAMS_t *params,
+init_geqrf(GEQRF_PARAMS_t<fortran_doublecomplex> *params,
fortran_int m,
fortran_int n)
{
+using ftyp = fortran_doublecomplex;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *tau, *work;
@@ -3009,14 +3141,14 @@ init_@lapack_func@(GEQRF_PARAMS_t *params,
size_t safe_m = m;
size_t safe_n = n;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
- size_t tau_size = safe_min_m_n * sizeof(@ftyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
+ size_t tau_size = safe_min_m_n * sizeof(ftyp);
fortran_int work_count;
size_t work_size;
fortran_int lda = fortran_int_max(1, m);
- mem_buff = malloc(a_size + tau_size);
+ mem_buff = (npy_uint8 *)malloc(a_size + tau_size);
if (!mem_buff)
goto error;
@@ -3028,37 +3160,37 @@ init_@lapack_func@(GEQRF_PARAMS_t *params,
params->M = m;
params->N = n;
- params->A = a;
- params->TAU = tau;
+ params->A = (ftyp*)a;
+ params->TAU = (ftyp*)tau;
params->LDA = lda;
{
/* compute optimal work size */
- @ftyp@ work_size_query;
+ ftyp work_size_query;
params->WORK = &work_size_query;
params->LWORK = -1;
- if (call_@lapack_func@(params) != 0)
+ if (call_geqrf(params) != 0)
goto error;
- work_count = (fortran_int) ((@ftyp@*)params->WORK)->r;
+ work_count = (fortran_int) ((ftyp*)params->WORK)->r;
}
params->LWORK = fortran_int_max(fortran_int_max(1, n),
work_count);
- work_size = (size_t) params->LWORK * sizeof(@ftyp@);
+ work_size = (size_t) params->LWORK * sizeof(ftyp);
- mem_buff2 = malloc(work_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
return 1;
error:
@@ -3070,13 +3202,10 @@ init_@lapack_func@(GEQRF_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-/**begin repeat
- #lapack_func=dgeqrf,zgeqrf#
- */
+template<typename ftyp>
static inline void
-release_@lapack_func@(GEQRF_PARAMS_t* params)
+release_geqrf(GEQRF_PARAMS_t<ftyp>* params)
{
/* A and WORK contain allocated blocks */
free(params->A);
@@ -3084,22 +3213,14 @@ release_@lapack_func@(GEQRF_PARAMS_t* params)
memset(params, 0, sizeof(*params));
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE=DOUBLE,CDOUBLE#
- #REALTYPE=DOUBLE,DOUBLE#
- #lapack_func=dgeqrf,zgeqrf#
- #typ = npy_double,npy_cdouble#
- #basetyp = npy_double,npy_double#
- #ftyp = fortran_doublereal,fortran_doublecomplex#
- #cmplx = 0, 1#
- */
+template<typename typ>
static void
-@TYPE@_qr_r_raw(char **args, npy_intp const *dimensions, npy_intp const *steps,
+qr_r_raw(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GEQRF_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+
+ GEQRF_PARAMS_t<ftyp> params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n, m;
@@ -3108,7 +3229,7 @@ static void
m = (fortran_int)dimensions[0];
n = (fortran_int)dimensions[1];
- if (init_@lapack_func@(&params, m, n)) {
+ if (init_geqrf(&params, m, n)) {
LINEARIZE_DATA_t a_in, tau_out;
init_linearize_data(&a_in, n, m, steps[1], steps[0]);
@@ -3116,50 +3237,57 @@ static void
BEGIN_OUTER_LOOP_2
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ not_ok = call_geqrf(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[0], params.A, &a_in);
- delinearize_@TYPE@_matrix(args[1], params.TAU, &tau_out);
+ delinearize_matrix((typ*)args[0], (typ*)params.A, &a_in);
+ delinearize_matrix((typ*)args[1], (typ*)params.TAU, &tau_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[1], &tau_out);
+ nan_matrix((typ*)args[1], &tau_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_geqrf(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat**/
/* -------------------------------------------------------------------------- */
/* qr common code (modes - reduced and complete) */
-typedef struct gqr_params_struct
+template<typename typ>
+struct GQR_PARAMS_t
{
fortran_int M;
fortran_int MC;
fortran_int MN;
void* A;
- void *Q;
+ typ *Q;
fortran_int LDA;
- void* TAU;
- void *WORK;
+ typ* TAU;
+ typ *WORK;
fortran_int LWORK;
-} GQR_PARAMS_t;
-
-/**begin repeat
- #lapack_func=dorgqr,zungqr#
- */
+} ;
static inline fortran_int
-call_@lapack_func@(GQR_PARAMS_t *params)
+call_gqr(GQR_PARAMS_t<double> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->M, &params->MC, &params->MN,
+ LAPACK(dorgqr)(&params->M, &params->MC, &params->MN,
+ params->Q, &params->LDA,
+ params->TAU,
+ params->WORK, &params->LWORK,
+ &rv);
+ return rv;
+}
+static inline fortran_int
+call_gqr(GQR_PARAMS_t<f2c_doublecomplex> *params)
+{
+ fortran_int rv;
+ LAPACK(zungqr)(&params->M, &params->MC, &params->MN,
params->Q, &params->LDA,
params->TAU,
params->WORK, &params->LWORK,
@@ -3167,18 +3295,13 @@ call_@lapack_func@(GQR_PARAMS_t *params)
return rv;
}
-/**end repeat**/
-
-/**begin repeat
- #lapack_func=dorgqr#
- #ftyp=fortran_doublereal#
- */
static inline int
-init_@lapack_func@_common(GQR_PARAMS_t *params,
+init_gqr_common(GQR_PARAMS_t<fortran_doublereal> *params,
fortran_int m,
fortran_int n,
fortran_int mc)
{
+using ftyp = fortran_doublereal;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *q, *tau, *work;
@@ -3187,15 +3310,15 @@ init_@lapack_func@_common(GQR_PARAMS_t *params,
size_t safe_min_m_n = min_m_n;
size_t safe_m = m;
size_t safe_n = n;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
- size_t q_size = safe_m * safe_mc * sizeof(@ftyp@);
- size_t tau_size = safe_min_m_n * sizeof(@ftyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
+ size_t q_size = safe_m * safe_mc * sizeof(ftyp);
+ size_t tau_size = safe_min_m_n * sizeof(ftyp);
fortran_int work_count;
size_t work_size;
fortran_int lda = fortran_int_max(1, m);
- mem_buff = malloc(q_size + tau_size + a_size);
+ mem_buff = (npy_uint8 *)malloc(q_size + tau_size + a_size);
if (!mem_buff)
goto error;
@@ -3209,35 +3332,35 @@ init_@lapack_func@_common(GQR_PARAMS_t *params,
params->MC = mc;
params->MN = min_m_n;
params->A = a;
- params->Q = q;
- params->TAU = tau;
+ params->Q = (ftyp*)q;
+ params->TAU = (ftyp*)tau;
params->LDA = lda;
{
/* compute optimal work size */
- @ftyp@ work_size_query;
+ ftyp work_size_query;
params->WORK = &work_size_query;
params->LWORK = -1;
- if (call_@lapack_func@(params) != 0)
+ if (call_gqr(params) != 0)
goto error;
- work_count = (fortran_int) *(@ftyp@*) params->WORK;
+ work_count = (fortran_int) *(ftyp*) params->WORK;
}
params->LWORK = fortran_int_max(fortran_int_max(1, n), work_count);
- work_size = (size_t) params->LWORK * sizeof(@ftyp@);
+ work_size = (size_t) params->LWORK * sizeof(ftyp);
- mem_buff2 = malloc(work_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
return 1;
error:
@@ -3249,18 +3372,14 @@ init_@lapack_func@_common(GQR_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-/**begin repeat
- #lapack_func=zungqr#
- #ftyp=fortran_doublecomplex#
- */
static inline int
-init_@lapack_func@_common(GQR_PARAMS_t *params,
+init_gqr_common(GQR_PARAMS_t<fortran_doublecomplex> *params,
fortran_int m,
fortran_int n,
fortran_int mc)
{
+using ftyp=fortran_doublecomplex;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *q, *tau, *work;
@@ -3270,15 +3389,15 @@ init_@lapack_func@_common(GQR_PARAMS_t *params,
size_t safe_m = m;
size_t safe_n = n;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
- size_t q_size = safe_m * safe_mc * sizeof(@ftyp@);
- size_t tau_size = safe_min_m_n * sizeof(@ftyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
+ size_t q_size = safe_m * safe_mc * sizeof(ftyp);
+ size_t tau_size = safe_min_m_n * sizeof(ftyp);
fortran_int work_count;
size_t work_size;
fortran_int lda = fortran_int_max(1, m);
- mem_buff = malloc(q_size + tau_size + a_size);
+ mem_buff = (npy_uint8 *)malloc(q_size + tau_size + a_size);
if (!mem_buff)
goto error;
@@ -3292,36 +3411,36 @@ init_@lapack_func@_common(GQR_PARAMS_t *params,
params->MC = mc;
params->MN = min_m_n;
params->A = a;
- params->Q = q;
- params->TAU = tau;
+ params->Q = (ftyp*)q;
+ params->TAU = (ftyp*)tau;
params->LDA = lda;
{
/* compute optimal work size */
- @ftyp@ work_size_query;
+ ftyp work_size_query;
params->WORK = &work_size_query;
params->LWORK = -1;
- if (call_@lapack_func@(params) != 0)
+ if (call_gqr(params) != 0)
goto error;
- work_count = (fortran_int) ((@ftyp@*)params->WORK)->r;
+ work_count = (fortran_int) ((ftyp*)params->WORK)->r;
}
params->LWORK = fortran_int_max(fortran_int_max(1, n),
work_count);
- work_size = (size_t) params->LWORK * sizeof(@ftyp@);
+ work_size = (size_t) params->LWORK * sizeof(ftyp);
- mem_buff2 = malloc(work_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
params->LWORK = work_count;
return 1;
@@ -3334,15 +3453,14 @@ init_@lapack_func@_common(GQR_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-
/* -------------------------------------------------------------------------- */
/* qr (modes - reduced) */
+template<typename typ>
static inline void
dump_gqr_params(const char *name,
- GQR_PARAMS_t *params)
+ GQR_PARAMS_t<typ> *params)
{
TRACE_TXT("\n%s:\n"\
@@ -3368,27 +3486,21 @@ dump_gqr_params(const char *name,
"LWORK", (int)params->LWORK);
}
-/**begin repeat
- #lapack_func=dorgqr,zungqr#
- #ftyp=fortran_doublereal,fortran_doublecomplex#
- */
+template<typename ftyp>
static inline int
-init_@lapack_func@(GQR_PARAMS_t *params,
+init_gqr(GQR_PARAMS_t<ftyp> *params,
fortran_int m,
fortran_int n)
{
- return init_@lapack_func@_common(
+ return init_gqr_common(
params, m, n,
fortran_int_min(m, n));
}
-/**end repeat**/
-/**begin repeat
- #lapack_func=dorgqr,zungqr#
- */
+template<typename typ>
static inline void
-release_@lapack_func@(GQR_PARAMS_t* params)
+release_gqr(GQR_PARAMS_t<typ>* params)
{
/* A and WORK contain allocated blocks */
free(params->Q);
@@ -3396,22 +3508,13 @@ release_@lapack_func@(GQR_PARAMS_t* params)
memset(params, 0, sizeof(*params));
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE=DOUBLE,CDOUBLE#
- #REALTYPE=DOUBLE,DOUBLE#
- #lapack_func=dorgqr,zungqr#
- #typ = npy_double, npy_cdouble#
- #basetyp = npy_double, npy_double#
- #ftyp = fortran_doublereal,fortran_doublecomplex#
- #cmplx = 0, 1#
- */
+template<typename typ>
static void
-@TYPE@_qr_reduced(char **args, npy_intp const *dimensions, npy_intp const *steps,
+qr_reduced(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GQR_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+ GQR_PARAMS_t<ftyp> params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n, m;
@@ -3420,7 +3523,7 @@ static void
m = (fortran_int)dimensions[0];
n = (fortran_int)dimensions[1];
- if (init_@lapack_func@(&params, m, n)) {
+ if (init_gqr(&params, m, n)) {
LINEARIZE_DATA_t a_in, tau_in, q_out;
init_linearize_data(&a_in, n, m, steps[1], steps[0]);
@@ -3429,57 +3532,44 @@ static void
BEGIN_OUTER_LOOP_3
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- linearize_@TYPE@_matrix(params.Q, args[0], &a_in);
- linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.Q, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.TAU, (typ*)args[1], &tau_in);
+ not_ok = call_gqr(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[2], params.Q, &q_out);
+ delinearize_matrix((typ*)args[2], (typ*)params.Q, &q_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[2], &q_out);
+ nan_matrix((typ*)args[2], &q_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_gqr(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat**/
-
/* -------------------------------------------------------------------------- */
/* qr (modes - complete) */
-/**begin repeat
- #lapack_func=dorgqr,zungqr#
- #ftyp=fortran_doublereal,fortran_doublecomplex#
- */
+template<typename ftyp>
static inline int
-init_@lapack_func@_complete(GQR_PARAMS_t *params,
+init_gqr_complete(GQR_PARAMS_t<ftyp> *params,
fortran_int m,
fortran_int n)
{
- return init_@lapack_func@_common(params, m, n, m);
+ return init_gqr_common(params, m, n, m);
}
-/**end repeat**/
-/**begin repeat
- #TYPE=DOUBLE,CDOUBLE#
- #REALTYPE=DOUBLE,DOUBLE#
- #lapack_func=dorgqr,zungqr#
- #typ = npy_double,npy_cdouble#
- #basetyp = npy_double,npy_double#
- #ftyp = fortran_doublereal,fortran_doublecomplex#
- #cmplx = 0, 1#
- */
+template<typename typ>
static void
-@TYPE@_qr_complete(char **args, npy_intp const *dimensions, npy_intp const *steps,
+qr_complete(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GQR_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+ GQR_PARAMS_t<ftyp> params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n, m;
@@ -3489,7 +3579,7 @@ static void
n = (fortran_int)dimensions[1];
- if (init_@lapack_func@_complete(&params, m, n)) {
+ if (init_gqr_complete(&params, m, n)) {
LINEARIZE_DATA_t a_in, tau_in, q_out;
init_linearize_data(&a_in, n, m, steps[1], steps[0]);
@@ -3498,51 +3588,50 @@ static void
BEGIN_OUTER_LOOP_3
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- linearize_@TYPE@_matrix(params.Q, args[0], &a_in);
- linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in);
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.Q, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.TAU, (typ*)args[1], &tau_in);
+ not_ok = call_gqr(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[2], params.Q, &q_out);
+ delinearize_matrix((typ*)args[2], (typ*)params.Q, &q_out);
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[2], &q_out);
+ nan_matrix((typ*)args[2], &q_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_gqr(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat**/
-
/* -------------------------------------------------------------------------- */
/* least squares */
-typedef struct gelsd_params_struct
+template<typename typ>
+struct GELSD_PARAMS_t
{
fortran_int M;
fortran_int N;
fortran_int NRHS;
- void *A;
+ typ *A;
fortran_int LDA;
- void *B;
+ typ *B;
fortran_int LDB;
- void *S;
- void *RCOND;
+ basetype_t<typ> *S;
+ basetype_t<typ> *RCOND;
fortran_int RANK;
- void *WORK;
+ typ *WORK;
fortran_int LWORK;
- void *RWORK;
- void *IWORK;
-} GELSD_PARAMS_t;
-
+ basetype_t<typ> *RWORK;
+ fortran_int *IWORK;
+};
+template<typename typ>
static inline void
dump_gelsd_params(const char *name,
- GELSD_PARAMS_t *params)
+ GELSD_PARAMS_t<typ> *params)
{
TRACE_TXT("\n%s:\n"\
@@ -3583,18 +3672,27 @@ dump_gelsd_params(const char *name,
"RCOND", params->RCOND);
}
+static inline fortran_int
+call_gelsd(GELSD_PARAMS_t<fortran_real> *params)
+{
+ fortran_int rv;
+ LAPACK(sgelsd)(&params->M, &params->N, &params->NRHS,
+ params->A, &params->LDA,
+ params->B, &params->LDB,
+ params->S,
+ params->RCOND, &params->RANK,
+ params->WORK, &params->LWORK,
+ params->IWORK,
+ &rv);
+ return rv;
+}
-/**begin repeat
- #TYPE=FLOAT,DOUBLE#
- #lapack_func=sgelsd,dgelsd#
- #ftyp=fortran_real,fortran_doublereal#
- */
static inline fortran_int
-call_@lapack_func@(GELSD_PARAMS_t *params)
+call_gelsd(GELSD_PARAMS_t<fortran_doublereal> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->M, &params->N, &params->NRHS,
+ LAPACK(dgelsd)(&params->M, &params->N, &params->NRHS,
params->A, &params->LDA,
params->B, &params->LDB,
params->S,
@@ -3605,11 +3703,14 @@ call_@lapack_func@(GELSD_PARAMS_t *params)
return rv;
}
+
+template<typename ftyp>
static inline int
-init_@lapack_func@(GELSD_PARAMS_t *params,
+init_gelsd(GELSD_PARAMS_t<ftyp> *params,
fortran_int m,
fortran_int n,
- fortran_int nrhs)
+ fortran_int nrhs,
+scalar_trait)
{
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
@@ -3622,9 +3723,9 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
size_t safe_n = n;
size_t safe_nrhs = nrhs;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
- size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@);
- size_t s_size = safe_min_m_n * sizeof(@ftyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
+ size_t b_size = safe_max_m_n * safe_nrhs * sizeof(ftyp);
+ size_t s_size = safe_min_m_n * sizeof(ftyp);
fortran_int work_count;
size_t work_size;
@@ -3632,7 +3733,7 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
fortran_int lda = fortran_int_max(1, m);
fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n));
- mem_buff = malloc(a_size + b_size + s_size);
+ mem_buff = (npy_uint8 *)malloc(a_size + b_size + s_size);
if (!mem_buff)
goto error;
@@ -3645,15 +3746,15 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
params->M = m;
params->N = n;
params->NRHS = nrhs;
- params->A = a;
- params->B = b;
- params->S = s;
+ params->A = (ftyp*)a;
+ params->B = (ftyp*)b;
+ params->S = (ftyp*)s;
params->LDA = lda;
params->LDB = ldb;
{
/* compute optimal work size */
- @ftyp@ work_size_query;
+ ftyp work_size_query;
fortran_int iwork_size_query;
params->WORK = &work_size_query;
@@ -3661,25 +3762,25 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
params->RWORK = NULL;
params->LWORK = -1;
- if (call_@lapack_func@(params) != 0)
+ if (call_gelsd(params) != 0)
goto error;
work_count = (fortran_int)work_size_query;
- work_size = (size_t) work_size_query * sizeof(@ftyp@);
+ work_size = (size_t) work_size_query * sizeof(ftyp);
iwork_size = (size_t)iwork_size_query * sizeof(fortran_int);
}
- mem_buff2 = malloc(work_size + iwork_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size + iwork_size);
if (!mem_buff2)
goto error;
work = mem_buff2;
iwork = work + work_size;
- params->WORK = work;
+ params->WORK = (ftyp*)work;
params->RWORK = NULL;
- params->IWORK = iwork;
+ params->IWORK = (fortran_int*)iwork;
params->LWORK = work_count;
return 1;
@@ -3692,37 +3793,46 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-
-/**begin repeat
- #TYPE=CFLOAT,CDOUBLE#
- #ftyp=fortran_complex,fortran_doublecomplex#
- #frealtyp=fortran_real,fortran_doublereal#
- #typ=COMPLEX_t,DOUBLECOMPLEX_t#
- #lapack_func=cgelsd,zgelsd#
- */
+static inline fortran_int
+call_gelsd(GELSD_PARAMS_t<fortran_complex> *params)
+{
+ fortran_int rv;
+ LAPACK(cgelsd)(&params->M, &params->N, &params->NRHS,
+ params->A, &params->LDA,
+ params->B, &params->LDB,
+ params->S,
+ params->RCOND, &params->RANK,
+ params->WORK, &params->LWORK,
+ params->RWORK, (fortran_int*)params->IWORK,
+ &rv);
+ return rv;
+}
static inline fortran_int
-call_@lapack_func@(GELSD_PARAMS_t *params)
+call_gelsd(GELSD_PARAMS_t<fortran_doublecomplex> *params)
{
fortran_int rv;
- LAPACK(@lapack_func@)(&params->M, &params->N, &params->NRHS,
+ LAPACK(zgelsd)(&params->M, &params->N, &params->NRHS,
params->A, &params->LDA,
params->B, &params->LDB,
params->S,
params->RCOND, &params->RANK,
params->WORK, &params->LWORK,
- params->RWORK, params->IWORK,
+ params->RWORK, (fortran_int*)params->IWORK,
&rv);
return rv;
}
+
+template<typename ftyp>
static inline int
-init_@lapack_func@(GELSD_PARAMS_t *params,
+init_gelsd(GELSD_PARAMS_t<ftyp> *params,
fortran_int m,
fortran_int n,
- fortran_int nrhs)
+ fortran_int nrhs,
+complex_trait)
{
+using frealtyp = basetype_t<ftyp>;
npy_uint8 *mem_buff = NULL;
npy_uint8 *mem_buff2 = NULL;
npy_uint8 *a, *b, *s, *work, *iwork, *rwork;
@@ -3734,16 +3844,16 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
size_t safe_n = n;
size_t safe_nrhs = nrhs;
- size_t a_size = safe_m * safe_n * sizeof(@ftyp@);
- size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@);
- size_t s_size = safe_min_m_n * sizeof(@frealtyp@);
+ size_t a_size = safe_m * safe_n * sizeof(ftyp);
+ size_t b_size = safe_max_m_n * safe_nrhs * sizeof(ftyp);
+ size_t s_size = safe_min_m_n * sizeof(frealtyp);
fortran_int work_count;
size_t work_size, rwork_size, iwork_size;
fortran_int lda = fortran_int_max(1, m);
fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n));
- mem_buff = malloc(a_size + b_size + s_size);
+ mem_buff = (npy_uint8 *)malloc(a_size + b_size + s_size);
if (!mem_buff)
goto error;
@@ -3756,16 +3866,16 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
params->M = m;
params->N = n;
params->NRHS = nrhs;
- params->A = a;
- params->B = b;
- params->S = s;
+ params->A = (ftyp*)a;
+ params->B = (ftyp*)b;
+ params->S = (frealtyp*)s;
params->LDA = lda;
params->LDB = ldb;
{
/* compute optimal work size */
- @ftyp@ work_size_query;
- @frealtyp@ rwork_size_query;
+ ftyp work_size_query;
+ frealtyp rwork_size_query;
fortran_int iwork_size_query;
params->WORK = &work_size_query;
@@ -3773,17 +3883,17 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
params->RWORK = &rwork_size_query;
params->LWORK = -1;
- if (call_@lapack_func@(params) != 0)
+ if (call_gelsd(params) != 0)
goto error;
work_count = (fortran_int)work_size_query.r;
- work_size = (size_t )work_size_query.r * sizeof(@ftyp@);
- rwork_size = (size_t)rwork_size_query * sizeof(@frealtyp@);
+ work_size = (size_t )work_size_query.r * sizeof(ftyp);
+ rwork_size = (size_t)rwork_size_query * sizeof(frealtyp);
iwork_size = (size_t)iwork_size_query * sizeof(fortran_int);
}
- mem_buff2 = malloc(work_size + rwork_size + iwork_size);
+ mem_buff2 = (npy_uint8 *)malloc(work_size + rwork_size + iwork_size);
if (!mem_buff2)
goto error;
@@ -3791,9 +3901,9 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
rwork = work + work_size;
iwork = rwork + rwork_size;
- params->WORK = work;
- params->RWORK = rwork;
- params->IWORK = iwork;
+ params->WORK = (ftyp*)work;
+ params->RWORK = (frealtyp*)rwork;
+ params->IWORK = (fortran_int*)iwork;
params->LWORK = work_count;
return 1;
@@ -3806,22 +3916,9 @@ init_@lapack_func@(GELSD_PARAMS_t *params,
return 0;
}
-/**end repeat**/
-
-
-/**begin repeat
- #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
- #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE#
- #lapack_func=sgelsd,dgelsd,cgelsd,zgelsd#
- #dot_func=sdot,ddot,cdotc,zdotc#
- #typ = npy_float, npy_double, npy_cfloat, npy_cdouble#
- #basetyp = npy_float, npy_double, npy_float, npy_double#
- #ftyp = fortran_real, fortran_doublereal,
- fortran_complex, fortran_doublecomplex#
- #cmplx = 0, 0, 1, 1#
- */
+template<typename ftyp>
static inline void
-release_@lapack_func@(GELSD_PARAMS_t* params)
+release_gelsd(GELSD_PARAMS_t<ftyp>* params)
{
/* A and WORK contain allocated blocks */
free(params->A);
@@ -3830,26 +3927,38 @@ release_@lapack_func@(GELSD_PARAMS_t* params)
}
/** Compute the squared l2 norm of a contiguous vector */
-static @basetyp@
-@TYPE@_abs2(@typ@ *p, npy_intp n) {
+template<typename typ>
+static basetype_t<typ>
+abs2(typ *p, npy_intp n, scalar_trait) {
npy_intp i;
- @basetyp@ res = 0;
+ basetype_t<typ> res = 0;
for (i = 0; i < n; i++) {
- @typ@ el = p[i];
-#if @cmplx@
- res += el.real*el.real + el.imag*el.imag;
-#else
+ typ el = p[i];
res += el*el;
-#endif
+ }
+ return res;
+}
+template<typename typ>
+static basetype_t<typ>
+abs2(typ *p, npy_intp n, complex_trait) {
+ npy_intp i;
+ basetype_t<typ> res = 0;
+ for (i = 0; i < n; i++) {
+ typ el = p[i];
+ res += el.real*el.real + el.imag*el.imag;
}
return res;
}
+
+template<typename typ>
static void
-@TYPE@_lstsq(char **args, npy_intp const *dimensions, npy_intp const *steps,
+lstsq(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
{
- GELSD_PARAMS_t params;
+using ftyp = fortran_type_t<typ>;
+using basetyp = basetype_t<typ>;
+ GELSD_PARAMS_t<ftyp> params;
int error_occurred = get_fp_invalid_and_clear();
fortran_int n, m, nrhs;
fortran_int excess;
@@ -3861,7 +3970,7 @@ static void
nrhs = (fortran_int)dimensions[2];
excess = m - n;
- if (init_@lapack_func@(&params, m, n, nrhs)) {
+ if (init_gelsd(&params, m, n, nrhs, dispatch_scalar<ftyp>{})) {
LINEARIZE_DATA_t a_in, b_in, x_out, s_out, r_out;
init_linearize_data(&a_in, n, m, steps[1], steps[0]);
@@ -3872,52 +3981,51 @@ static void
BEGIN_OUTER_LOOP_7
int not_ok;
- linearize_@TYPE@_matrix(params.A, args[0], &a_in);
- linearize_@TYPE@_matrix(params.B, args[1], &b_in);
- params.RCOND = args[2];
- not_ok = call_@lapack_func@(&params);
+ linearize_matrix((typ*)params.A, (typ*)args[0], &a_in);
+ linearize_matrix((typ*)params.B, (typ*)args[1], &b_in);
+ params.RCOND = (basetyp*)args[2];
+ not_ok = call_gelsd(&params);
if (!not_ok) {
- delinearize_@TYPE@_matrix(args[3], params.B, &x_out);
+ delinearize_matrix((typ*)args[3], (typ*)params.B, &x_out);
*(npy_int*) args[5] = params.RANK;
- delinearize_@REALTYPE@_matrix(args[6], params.S, &s_out);
+ delinearize_matrix((basetyp*)args[6], (basetyp*)params.S, &s_out);
/* Note that linalg.lstsq discards this when excess == 0 */
if (excess >= 0 && params.RANK == n) {
/* Compute the residuals as the square sum of each column */
int i;
char *resid = args[4];
- @ftyp@ *components = (@ftyp@ *)params.B + n;
+ ftyp *components = (ftyp *)params.B + n;
for (i = 0; i < nrhs; i++) {
- @ftyp@ *vector = components + i*m;
+ ftyp *vector = components + i*m;
/* Numpy and fortran floating types are the same size,
* so this cast is safe */
- @basetyp@ abs2 = @TYPE@_abs2((@typ@ *)vector, excess);
+ basetyp abs = abs2((typ *)vector, excess,
+dispatch_scalar<typ>{});
memcpy(
resid + i*r_out.column_strides,
- &abs2, sizeof(abs2));
+ &abs, sizeof(abs));
}
}
else {
/* Note that this is always discarded by linalg.lstsq */
- nan_@REALTYPE@_matrix(args[4], &r_out);
+ nan_matrix((basetyp*)args[4], &r_out);
}
} else {
error_occurred = 1;
- nan_@TYPE@_matrix(args[3], &x_out);
- nan_@REALTYPE@_matrix(args[4], &r_out);
+ nan_matrix((typ*)args[3], &x_out);
+ nan_matrix((basetyp*)args[4], &r_out);
*(npy_int*) args[5] = -1;
- nan_@REALTYPE@_matrix(args[6], &s_out);
+ nan_matrix((basetyp*)args[6], &s_out);
}
END_OUTER_LOOP
- release_@lapack_func@(&params);
+ release_gelsd(&params);
}
set_fp_invalid_or_clear(error_occurred);
}
-/**end repeat**/
-
#pragma GCC diagnostic pop
/* -------------------------------------------------------------------------- */
@@ -3962,6 +4070,22 @@ static void *array_of_nulls[] = {
CFLOAT_ ## NAME, \
CDOUBLE_ ## NAME \
}
+#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX_(NAME) \
+ static PyUFuncGenericFunction \
+ FUNC_ARRAY_NAME(NAME)[] = { \
+ NAME<npy_float, npy_float>, \
+ NAME<npy_double, npy_double>, \
+ NAME<npy_cfloat, npy_float>, \
+ NAME<npy_cdouble, npy_double> \
+ }
+#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(NAME) \
+ static PyUFuncGenericFunction \
+ FUNC_ARRAY_NAME(NAME)[] = { \
+ NAME<npy_float>, \
+ NAME<npy_double>, \
+ NAME<npy_cfloat>, \
+ NAME<npy_cdouble> \
+ }
/* There are problems with eig in complex single precision.
* That kernel is disabled
@@ -3969,9 +4093,9 @@ static void *array_of_nulls[] = {
#define GUFUNC_FUNC_ARRAY_EIG(NAME) \
static PyUFuncGenericFunction \
FUNC_ARRAY_NAME(NAME)[] = { \
- FLOAT_ ## NAME, \
- DOUBLE_ ## NAME, \
- CDOUBLE_ ## NAME \
+ NAME<fortran_complex,fortran_real>, \
+ NAME<fortran_doublecomplex,fortran_doublereal>, \
+ NAME<fortran_doublecomplex,fortran_doublecomplex> \
}
/* The single precision functions are not used at all,
@@ -3984,25 +4108,31 @@ static void *array_of_nulls[] = {
DOUBLE_ ## NAME, \
CDOUBLE_ ## NAME \
}
+#define GUFUNC_FUNC_ARRAY_QR__(NAME) \
+ static PyUFuncGenericFunction \
+ FUNC_ARRAY_NAME(NAME)[] = { \
+ NAME<npy_double>, \
+ NAME<npy_cdouble> \
+ }
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A);
-GUFUNC_FUNC_ARRAY_QR(qr_r_raw);
-GUFUNC_FUNC_ARRAY_QR(qr_reduced);
-GUFUNC_FUNC_ARRAY_QR(qr_complete);
-GUFUNC_FUNC_ARRAY_REAL_COMPLEX(lstsq);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX_(slogdet);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX_(det);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eighlo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eighup);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eigvalshlo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eigvalshup);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(solve);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(solve1);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(inv);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(cholesky_lo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(svd_N);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(svd_S);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(svd_A);
+GUFUNC_FUNC_ARRAY_QR__(qr_r_raw);
+GUFUNC_FUNC_ARRAY_QR__(qr_reduced);
+GUFUNC_FUNC_ARRAY_QR__(qr_complete);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(lstsq);
GUFUNC_FUNC_ARRAY_EIG(eig);
GUFUNC_FUNC_ARRAY_EIG(eigvals);
@@ -4095,9 +4225,9 @@ static char lstsq_types[] = {
};
typedef struct gufunc_descriptor_struct {
- char *name;
- char *signature;
- char *doc;
+ const char *name;
+ const char *signature;
+ const char *doc;
int ntypes;
int nin;
int nout;
@@ -4402,7 +4532,6 @@ PyMODINIT_FUNC PyInit__umath_linalg(void)
PyObject *d;
PyObject *version;
- init_constants();
m = PyModule_Create(&moduledef);
if (m == NULL) {
return NULL;