diff options
| author | Bhavuk kalra <bhavukkalra1786@gmail.com> | 2022-04-03 23:55:56 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-04-03 23:55:56 +0530 |
| commit | 6aa2fcaecf8e56b61143f34269d04fc55f8cabf1 (patch) | |
| tree | 00276e2b56888f48a6e2a6756cd4af52fef46862 | |
| parent | 65f4baf28a5f8314ea7e3b9059f0f844efa129a0 (diff) | |
| parent | 37bbfc9012ea46427143c0aa865d4cc7d8c94b9a (diff) | |
| download | numpy-6aa2fcaecf8e56b61143f34269d04fc55f8cabf1.tar.gz | |
Merge branch 'numpy:main' into doc-improveGenShuffleDoc
28 files changed, 2301 insertions, 1180 deletions
diff --git a/.gitpod.yml b/.gitpod.yml index c46752f10..096483fd6 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -19,6 +19,8 @@ tasks: echo "📖 Building docs 📖 " cd doc make html + echo "Installing dependencies for documentation Live-Preview" + pip install esbonio echo "✨ Pre-build complete! You can close this terminal ✨ " # -------------------------------------------------------- diff --git a/doc/release/upcoming_changes/21154.improvement.rst b/doc/release/upcoming_changes/21154.improvement.rst new file mode 100644 index 000000000..38630b47b --- /dev/null +++ b/doc/release/upcoming_changes/21154.improvement.rst @@ -0,0 +1,7 @@ +Math C library feature detection now uses correct signatures +------------------------------------------------------------ +Compiling is preceded by a detection phase to determine whether the +underlying libc supports certain math operations. Previously this code +did not respect the proper signatures. Fixing this enables compilation +for the ``wasm-ld`` backend (compilation for web assembly) and reduces +the number of warnings. diff --git a/doc/source/dev/index.rst b/doc/source/dev/index.rst index a8c969267..46ff3a5b2 100644 --- a/doc/source/dev/index.rst +++ b/doc/source/dev/index.rst @@ -27,8 +27,10 @@ the `numpy-discussion mailing list <https://mail.python.org/mailman/listinfo/num or on `GitHub <https://github.com/numpy/numpy>`__ (open an issue or comment on a relevant issue). These are our preferred communication channels (open source is open by nature!), however if you prefer to discuss in private first, please reach out to -our community coordinators at `numpy-team@googlegroups.com` or `numpy-team.slack.com` -(send an email to `numpy-team@googlegroups.com` for an invite the first time). +our community coordinators at `numpy-team@googlegroups.com +<mailto://numpy-team@googlegroups.com>`_ or `numpy-team.slack.com +<https://numpy-team.slack.com>`__ (send an email to `numpy-team@googlegroups.com`_ for an +invite the first time). Development process - summary ============================= @@ -53,7 +55,7 @@ Here's the short summary, complete TOC links are below: git remote add upstream https://github.com/numpy/numpy.git - * Now, `git remote -v` will show two remote repositories named: + * Now, ``git remote -v`` will show two remote repositories named: - ``upstream``, which refers to the ``numpy`` repository - ``origin``, which refers to your personal fork diff --git a/environment.yml b/environment.yml index f2be74f62..922fb55dc 100644 --- a/environment.yml +++ b/environment.yml @@ -29,7 +29,8 @@ dependencies: - pandas - matplotlib - pydata-sphinx-theme=0.7.2 - - breathe + # NOTE: breathe 4.33.0 collides with sphinx.ext.graphviz + - breathe!=4.33.0 # For linting - pycodestyle=2.7.0 - gitpython diff --git a/numpy/core/feature_detection_locale.h b/numpy/core/feature_detection_locale.h new file mode 100644 index 000000000..0af1d6e7e --- /dev/null +++ b/numpy/core/feature_detection_locale.h @@ -0,0 +1 @@ +long double strtold_l(const char*, char**, locale_t); diff --git a/numpy/core/feature_detection_math.h b/numpy/core/feature_detection_math.h new file mode 100644 index 000000000..27ad7bcf0 --- /dev/null +++ b/numpy/core/feature_detection_math.h @@ -0,0 +1,107 @@ +double expm1(double); +double log1p(double); +double acosh(double); +double asinh(double); +double atanh(double); +double rint(double); +double trunc(double); +double exp2(double); +double log2(double); +double hypot(double, double); +double atan2(double, double); +double pow(double, double); +double copysign(double, double); +double nextafter(double, double); +long long strtoll(const char*, char**, int); +unsigned long long strtoull(const char*, char**, int); +double cbrt(double); +long double sinl(long double); +long double cosl(long double); +long double tanl(long double); +long double sinhl(long double); +long double coshl(long double); +long double tanhl(long double); +long double fabsl(long double); +long double floorl(long double); +long double ceill(long double); +long double rintl(long double); +long double truncl(long double); +long double sqrtl(long double); +long double log10l(long double); +long double logl(long double); +long double log1pl(long double); +long double expl(long double); +long double expm1l(long double); +long double asinl(long double); +long double acosl(long double); +long double atanl(long double); +long double asinhl(long double); +long double acoshl(long double); +long double atanhl(long double); +long double hypotl(long double, long double); +long double atan2l(long double, long double); +long double powl(long double, long double); +long double fmodl(long double, long double); +long double modfl(long double, long double*); +long double frexpl(long double, int*); +long double ldexpl(long double, int); +long double exp2l(long double); +long double log2l(long double); +long double copysignl(long double, long double); +long double nextafterl(long double, long double); +long double cbrtl(long double); +float sinf(float); +float cosf(float); +float tanf(float); +float sinhf(float); +float coshf(float); +float tanhf(float); +float fabsf(float); +float floorf(float); +float ceilf(float); +float rintf(float); +float truncf(float); +float sqrtf(float); +float log10f(float); +float logf(float); +float log1pf(float); +float expf(float); +float expm1f(float); +float asinf(float); +float acosf(float); +float atanf(float); +float asinhf(float); +float acoshf(float); +float atanhf(float); +float hypotf(float, float); +float atan2f(float, float); +float powf(float, float); +float fmodf(float, float); +float modff(float, float*); +float frexpf(float, int*); +float ldexpf(float, int); +float exp2f(float); +float log2f(float); +float copysignf(float, float); +float nextafterf(float, float); +float cbrtf(float); +double sin(double); +double cos(double); +double tan(double); +double sinh(double); +double cosh(double); +double tanh(double); +double fabs(double); +double floor(double); +double ceil(double); +double sqrt(double); +double log10(double); +double log(double); +double exp(double); +double asin(double); +double acos(double); +double atan(double); +double fmod(double, double); +double modf(double, double*); +double frexp(double, int*); +double ldexp(double, int); diff --git a/numpy/core/feature_detection_misc.h b/numpy/core/feature_detection_misc.h new file mode 100644 index 000000000..f58cf4b62 --- /dev/null +++ b/numpy/core/feature_detection_misc.h @@ -0,0 +1,4 @@ +#include <stddef.h> + +int backtrace(void **, int); +int madvise(void *, size_t, int); diff --git a/numpy/core/feature_detection_stdio.h b/numpy/core/feature_detection_stdio.h new file mode 100644 index 000000000..bc14d16d0 --- /dev/null +++ b/numpy/core/feature_detection_stdio.h @@ -0,0 +1,6 @@ +#include <stdio.h> +#include <fcntl.h> + +off_t ftello(FILE *stream); +int fseeko(FILE *stream, off_t offset, int whence); +int fallocate(int, int, off_t, off_t); diff --git a/numpy/core/setup.py b/numpy/core/setup.py index c4222d7c0..5b946d0d4 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -129,32 +129,48 @@ def win32_checks(deflist): deflist.append('FORCE_NO_LONG_DOUBLE_FORMATTING') def check_math_capabilities(config, ext, moredefs, mathlibs): - def check_func(func_name): - return config.check_func(func_name, libraries=mathlibs, - decl=True, call=True) - - def check_funcs_once(funcs_name): - decl = dict([(f, True) for f in funcs_name]) - st = config.check_funcs_once(funcs_name, libraries=mathlibs, - decl=decl, call=decl) + def check_func( + func_name, + decl=False, + headers=["feature_detection_math.h"], + ): + return config.check_func( + func_name, + libraries=mathlibs, + decl=decl, + call=True, + call_args=FUNC_CALL_ARGS[func_name], + headers=headers, + ) + + def check_funcs_once(funcs_name, headers=["feature_detection_math.h"]): + call = dict([(f, True) for f in funcs_name]) + call_args = dict([(f, FUNC_CALL_ARGS[f]) for f in funcs_name]) + st = config.check_funcs_once( + funcs_name, + libraries=mathlibs, + decl=False, + call=call, + call_args=call_args, + headers=headers, + ) if st: moredefs.extend([(fname2def(f), 1) for f in funcs_name]) return st - def check_funcs(funcs_name): + def check_funcs(funcs_name, headers=["feature_detection_math.h"]): # Use check_funcs_once first, and if it does not work, test func per # func. Return success only if all the functions are available - if not check_funcs_once(funcs_name): + if not check_funcs_once(funcs_name, headers=headers): # Global check failed, check func per func for f in funcs_name: - if check_func(f): + if check_func(f, headers=headers): moredefs.append((fname2def(f), 1)) return 0 else: return 1 #use_msvc = config.check_decl("_MSC_VER") - if not check_funcs_once(MANDATORY_FUNCS): raise SystemError("One of the required function to build numpy is not" " available (the list is %s)." % str(MANDATORY_FUNCS)) @@ -169,15 +185,34 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): for f in OPTIONAL_STDFUNCS_MAYBE: if config.check_decl(fname2def(f), headers=["Python.h", "math.h"]): - OPTIONAL_STDFUNCS.remove(f) + if f in OPTIONAL_STDFUNCS: + OPTIONAL_STDFUNCS.remove(f) + else: + OPTIONAL_FILE_FUNCS.remove(f) + check_funcs(OPTIONAL_STDFUNCS) + check_funcs(OPTIONAL_FILE_FUNCS, headers=["feature_detection_stdio.h"]) + check_funcs(OPTIONAL_MISC_FUNCS, headers=["feature_detection_misc.h"]) + + for h in OPTIONAL_HEADERS: if config.check_func("", decl=False, call=False, headers=[h]): h = h.replace(".", "_").replace(os.path.sep, "_") moredefs.append((fname2def(h), 1)) + # Try with both "locale.h" and "xlocale.h" + locale_headers = [ + "stdlib.h", + "xlocale.h", + "feature_detection_locale.h", + ] + if not check_funcs(OPTIONAL_LOCALE_FUNCS, headers=locale_headers): + # It didn't work with xlocale.h, maybe it will work with locale.h? + locale_headers[1] = "locale.h" + check_funcs(OPTIONAL_LOCALE_FUNCS, headers=locale_headers) + for tup in OPTIONAL_INTRINSICS: headers = None if len(tup) == 2: @@ -398,20 +433,28 @@ def check_types(config_cmd, ext, build_dir): def check_mathlib(config_cmd): # Testing the C math library mathlibs = [] - mathlibs_choices = [[], ['m'], ['cpml']] - mathlib = os.environ.get('MATHLIB') + mathlibs_choices = [[], ["m"], ["cpml"]] + mathlib = os.environ.get("MATHLIB") if mathlib: - mathlibs_choices.insert(0, mathlib.split(',')) + mathlibs_choices.insert(0, mathlib.split(",")) for libs in mathlibs_choices: - if config_cmd.check_func("exp", libraries=libs, decl=True, call=True): + if config_cmd.check_func( + "log", + libraries=libs, + call_args="0", + decl="double log(double);", + call=True + ): mathlibs = libs break else: raise RuntimeError( "math library missing; rerun setup.py after setting the " - "MATHLIB env variable") + "MATHLIB env variable" + ) return mathlibs + def visibility_define(config): """Return the define value to use for NPY_VISIBILITY_HIDDEN (may be empty string).""" diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 20d44f4ec..1143872b1 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -1,8 +1,9 @@ # Code common to build tools -import sys -import warnings import copy +import pathlib +import sys import textwrap +import warnings from numpy.distutils.misc_util import mingw32 @@ -91,6 +92,32 @@ def check_api_version(apiversion, codegen_dir): warnings.warn(msg % (apiversion, curapi_hash, apiversion, api_hash, __file__), MismatchCAPIWarning, stacklevel=2) + + +FUNC_CALL_ARGS = {} + +def set_sig(sig): + prefix, _, args = sig.partition("(") + args = args.rpartition(")")[0] + funcname = prefix.rpartition(" ")[-1] + args = [arg.strip() for arg in args.split(",")] + FUNC_CALL_ARGS[funcname] = ", ".join("(%s) 0" % arg for arg in args) + + +for file in [ + "feature_detection_locale.h", + "feature_detection_math.h", + "feature_detection_misc.h", + "feature_detection_stdio.h", +]: + with open(pathlib.Path(__file__).parent / file) as f: + for line in f: + if line.startswith("#"): + continue + if not line.strip(): + continue + set_sig(line) + # Mandatory functions: if not found, fail the build MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", "floor", "ceil", "sqrt", "log10", "log", "exp", "asin", @@ -100,9 +127,11 @@ MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", # replacement implementation. Note that some of these are C99 functions. OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh", "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow", - "copysign", "nextafter", "ftello", "fseeko", - "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate", - "backtrace", "madvise"] + "copysign", "nextafter", "strtoll", "strtoull", "cbrt"] + +OPTIONAL_LOCALE_FUNCS = ["strtold_l"] +OPTIONAL_FILE_FUNCS = ["ftello", "fseeko", "fallocate"] +OPTIONAL_MISC_FUNCS = ["backtrace", "madvise"] OPTIONAL_HEADERS = [ diff --git a/numpy/core/src/multiarray/textreading/tokenize.cpp b/numpy/core/src/multiarray/textreading/tokenize.cpp index b6d9f882b..b09fc3356 100644 --- a/numpy/core/src/multiarray/textreading/tokenize.cpp +++ b/numpy/core/src/multiarray/textreading/tokenize.cpp @@ -40,7 +40,7 @@ template <typename UCS> -static NPY_INLINE int +static inline int copy_to_field_buffer(tokenizer_state *ts, const UCS *chunk_start, const UCS *chunk_end) { @@ -73,7 +73,7 @@ copy_to_field_buffer(tokenizer_state *ts, } -static NPY_INLINE int +static inline int add_field(tokenizer_state *ts) { /* The previous field is done, advance to keep a NUL byte at the end */ @@ -109,7 +109,7 @@ add_field(tokenizer_state *ts) template <typename UCS> -static NPY_INLINE int +static inline int tokenizer_core(tokenizer_state *ts, parser_config *const config) { UCS *pos = (UCS *)ts->pos; diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h index 7ca0c5ba0..a474b3de3 100644 --- a/numpy/core/src/npymath/npy_math_private.h +++ b/numpy/core/src/npymath/npy_math_private.h @@ -514,7 +514,7 @@ typedef union { typedef union { npy_cdouble npy_z; #ifdef __cplusplus - std::complex<double> c99z; + std::complex<double> c99_z; #else complex double c99_z; #endif @@ -523,7 +523,7 @@ typedef union { typedef union { npy_cfloat npy_z; #ifdef __cplusplus - std::complex<float> c99z; + std::complex<float> c99_z; #else complex float c99_z; #endif diff --git a/numpy/core/src/npysort/binsearch.cpp b/numpy/core/src/npysort/binsearch.cpp index 8dd72c094..98d305910 100644 --- a/numpy/core/src/npysort/binsearch.cpp +++ b/numpy/core/src/npysort/binsearch.cpp @@ -344,7 +344,7 @@ constexpr std::array<typename binsearch_t<arg>::value_type, binsearch_t<arg>::map; template <arg_t arg> -static NPY_INLINE typename binsearch_t<arg>::function_type +static inline typename binsearch_t<arg>::function_type _get_binsearch_func(PyArray_Descr *dtype, NPY_SEARCHSIDE side) { using binsearch = binsearch_t<arg>; diff --git a/numpy/core/src/npysort/radixsort.cpp b/numpy/core/src/npysort/radixsort.cpp index 5393869ee..0e1a41c69 100644 --- a/numpy/core/src/npysort/radixsort.cpp +++ b/numpy/core/src/npysort/radixsort.cpp @@ -4,7 +4,7 @@ #include "npysort_common.h" #include "../common/numpy_tag.h" -#include <stdlib.h> +#include <cstdlib> #include <type_traits> /* diff --git a/numpy/core/src/npysort/selection.cpp b/numpy/core/src/npysort/selection.cpp index 7fd04a660..ebe188b71 100644 --- a/numpy/core/src/npysort/selection.cpp +++ b/numpy/core/src/npysort/selection.cpp @@ -39,7 +39,7 @@ introselect_(type *v, npy_intp *tosort, npy_intp num, npy_intp kth, ***************************************************************************** */ -static NPY_INLINE void +static inline void store_pivot(npy_intp pivot, npy_intp kth, npy_intp *pivots, npy_intp *npiv) { if (pivots == NULL) { @@ -104,7 +104,7 @@ inexact() * for efficient partitioning, see unguarded_partition */ template <typename Tag, bool arg, typename type> -static NPY_INLINE void +static inline void median3_swap_(type *v, npy_intp *tosort, npy_intp low, npy_intp mid, npy_intp high) { @@ -170,7 +170,7 @@ median5_(type *v, npy_intp *tosort) * lower-than-pivot [x x x x] larger-than-pivot */ template <typename Tag, bool arg, typename type> -static NPY_INLINE void +static inline void unguarded_partition_(type *v, npy_intp *tosort, const type pivot, npy_intp *ll, npy_intp *hh) { @@ -442,7 +442,7 @@ struct partition_t { }; constexpr std::array<arg_map, partition_t::taglist::size> partition_t::map; -static NPY_INLINE PyArray_PartitionFunc * +static inline PyArray_PartitionFunc * _get_partition_func(int type, NPY_SELECTKIND which) { npy_intp i; @@ -459,7 +459,7 @@ _get_partition_func(int type, NPY_SELECTKIND which) return NULL; } -static NPY_INLINE PyArray_ArgPartitionFunc * +static inline PyArray_ArgPartitionFunc * _get_argpartition_func(int type, NPY_SELECTKIND which) { npy_intp i; diff --git a/numpy/core/src/npysort/timsort.cpp b/numpy/core/src/npysort/timsort.cpp index 8bcd90061..27294af0c 100644 --- a/numpy/core/src/npysort/timsort.cpp +++ b/numpy/core/src/npysort/timsort.cpp @@ -67,7 +67,7 @@ typedef struct { } buffer_intp; /* buffer method */ -static NPY_INLINE int +static inline int resize_buffer_intp(buffer_intp *buffer, npy_intp new_size) { if (new_size <= buffer->size) { @@ -105,7 +105,7 @@ struct buffer_ { }; template <typename Tag> -static NPY_INLINE int +static inline int resize_buffer_(buffer_<Tag> *buffer, npy_intp new_size) { using type = typename Tag::type; @@ -978,7 +978,7 @@ struct string_buffer_ { }; template <typename Tag> -static NPY_INLINE int +static inline int resize_buffer_(string_buffer_<Tag> *buffer, npy_intp new_size) { using type = typename Tag::type; @@ -1873,7 +1873,7 @@ typedef struct { size_t len; } buffer_char; -static NPY_INLINE int +static inline int resize_buffer_char(buffer_char *buffer, npy_intp new_size) { if (new_size <= buffer->size) { diff --git a/numpy/core/src/npysort/x86-qsort.dispatch.cpp b/numpy/core/src/npysort/x86-qsort.dispatch.cpp index 4b01e3528..66261ad0a 100644 --- a/numpy/core/src/npysort/x86-qsort.dispatch.cpp +++ b/numpy/core/src/npysort/x86-qsort.dispatch.cpp @@ -72,7 +72,7 @@ heapsort_(type *start, npy_intp n); _mm256_or_si256(_mm256_slli_epi64((x), (k)), \ _mm256_srli_epi64((x), 64 - (k))) -static NPY_INLINE __m256i +static inline __m256i vnext(__m256i *s0, __m256i *s1) { *s1 = _mm256_xor_si256(*s0, *s1); /* modify vectors s1 and s0 */ @@ -83,7 +83,7 @@ vnext(__m256i *s0, __m256i *s1) } /* transform random numbers to the range between 0 and bound - 1 */ -static NPY_INLINE __m256i +static inline __m256i rnd_epu32(__m256i rnd_vec, __m256i bound) { __m256i even = _mm256_srli_epi64(_mm256_mul_epu32(rnd_vec, bound), 32); @@ -282,7 +282,7 @@ COEX(mm_t &a, mm_t &b) } template <typename vtype, typename zmm_t = typename vtype::zmm_t> -static NPY_INLINE zmm_t +static inline zmm_t cmp_merge(zmm_t in1, zmm_t in2, __mmask16 mask) { zmm_t min = vtype::min(in2, in1); @@ -295,7 +295,7 @@ cmp_merge(zmm_t in1, zmm_t in2, __mmask16 mask) * https://en.wikipedia.org/wiki/Bitonic_sorter#/media/File:BitonicSort.svg */ template <typename vtype, typename zmm_t = typename vtype::zmm_t> -static NPY_INLINE zmm_t +static inline zmm_t sort_zmm(zmm_t zmm) { zmm = cmp_merge<vtype>(zmm, vtype::template shuffle<SHUFFLE_MASK(2, 3, 0, 1)>(zmm), @@ -323,7 +323,7 @@ sort_zmm(zmm_t zmm) // Assumes zmm is bitonic and performs a recursive half cleaner template <typename vtype, typename zmm_t = typename vtype::zmm_t> -static NPY_INLINE zmm_t +static inline zmm_t bitonic_merge_zmm(zmm_t zmm) { // 1) half_cleaner[16]: compare 1-9, 2-10, 3-11 etc .. @@ -343,7 +343,7 @@ bitonic_merge_zmm(zmm_t zmm) // Assumes zmm1 and zmm2 are sorted and performs a recursive half cleaner template <typename vtype, typename zmm_t = typename vtype::zmm_t> -static NPY_INLINE void +static inline void bitonic_merge_two_zmm(zmm_t *zmm1, zmm_t *zmm2) { // 1) First step of a merging network: coex of zmm1 and zmm2 reversed @@ -358,7 +358,7 @@ bitonic_merge_two_zmm(zmm_t *zmm1, zmm_t *zmm2) // Assumes [zmm0, zmm1] and [zmm2, zmm3] are sorted and performs a recursive // half cleaner template <typename vtype, typename zmm_t = typename vtype::zmm_t> -static NPY_INLINE void +static inline void bitonic_merge_four_zmm(zmm_t *zmm) { zmm_t zmm2r = vtype::permutexvar(_mm512_set_epi32(NETWORK5), zmm[2]); @@ -380,7 +380,7 @@ bitonic_merge_four_zmm(zmm_t *zmm) } template <typename vtype, typename zmm_t = typename vtype::zmm_t> -static NPY_INLINE void +static inline void bitonic_merge_eight_zmm(zmm_t *zmm) { zmm_t zmm4r = vtype::permutexvar(_mm512_set_epi32(NETWORK5), zmm[4]); @@ -418,7 +418,7 @@ bitonic_merge_eight_zmm(zmm_t *zmm) } template <typename vtype, typename type_t> -static NPY_INLINE void +static inline void sort_16(type_t *arr, npy_int N) { __mmask16 load_mask = (0x0001 << N) - 0x0001; @@ -428,7 +428,7 @@ sort_16(type_t *arr, npy_int N) } template <typename vtype, typename type_t> -static NPY_INLINE void +static inline void sort_32(type_t *arr, npy_int N) { if (N <= 16) { @@ -447,7 +447,7 @@ sort_32(type_t *arr, npy_int N) } template <typename vtype, typename type_t> -static NPY_INLINE void +static inline void sort_64(type_t *arr, npy_int N) { if (N <= 32) { @@ -482,7 +482,7 @@ sort_64(type_t *arr, npy_int N) } template <typename vtype, typename type_t> -static NPY_INLINE void +static inline void sort_128(type_t *arr, npy_int N) { if (N <= 64) { @@ -545,7 +545,7 @@ sort_128(type_t *arr, npy_int N) } template <typename type_t> -static NPY_INLINE void +static inline void swap(type_t *arr, npy_intp ii, npy_intp jj) { type_t temp = arr[ii]; @@ -555,7 +555,7 @@ swap(type_t *arr, npy_intp ii, npy_intp jj) // Median of 3 strategy // template<typename type_t> -// static NPY_INLINE +// static inline // npy_intp get_pivot_index(type_t *arr, const npy_intp left, const npy_intp // right) { // return (rand() % (right + 1 - left)) + left; @@ -574,7 +574,7 @@ swap(type_t *arr, npy_intp ii, npy_intp jj) */ template <typename vtype, typename type_t> -static NPY_INLINE type_t +static inline type_t get_pivot(type_t *arr, const npy_intp left, const npy_intp right) { /* seeds for vectorized random number generator */ @@ -641,7 +641,7 @@ get_pivot(type_t *arr, const npy_intp left, const npy_intp right) * last element that is less than equal to the pivot. */ template <typename vtype, typename type_t, typename zmm_t> -static NPY_INLINE npy_int +static inline npy_int partition_vec(type_t *arr, npy_intp left, npy_intp right, const zmm_t curr_vec, const zmm_t pivot_vec, zmm_t *smallest_vec, zmm_t *biggest_vec) { @@ -661,7 +661,7 @@ partition_vec(type_t *arr, npy_intp left, npy_intp right, const zmm_t curr_vec, * last element that is less than equal to the pivot. */ template <typename vtype, typename type_t> -static NPY_INLINE npy_intp +static inline npy_intp partition_avx512(type_t *arr, npy_intp left, npy_intp right, type_t pivot, type_t *smallest, type_t *biggest) { @@ -742,7 +742,7 @@ partition_avx512(type_t *arr, npy_intp left, npy_intp right, type_t pivot, } template <typename vtype, typename type_t> -static NPY_INLINE void +static inline void qsort_(type_t *arr, npy_intp left, npy_intp right, npy_int max_iters) { /* @@ -771,7 +771,7 @@ qsort_(type_t *arr, npy_intp left, npy_intp right, npy_int max_iters) qsort_<vtype>(arr, pivot_index, right, max_iters - 1); } -static NPY_INLINE npy_intp +static inline npy_intp replace_nan_with_inf(npy_float *arr, npy_intp arrsize) { npy_intp nan_count = 0; @@ -790,7 +790,7 @@ replace_nan_with_inf(npy_float *arr, npy_intp arrsize) return nan_count; } -static NPY_INLINE void +static inline void replace_inf_with_nan(npy_float *arr, npy_intp arrsize, npy_intp nan_count) { for (npy_intp ii = arrsize - 1; nan_count > 0; --ii) { diff --git a/numpy/core/tests/data/generate_umath_validation_data.cpp b/numpy/core/tests/data/generate_umath_validation_data.cpp index 418eae670..51ee12501 100644 --- a/numpy/core/tests/data/generate_umath_validation_data.cpp +++ b/numpy/core/tests/data/generate_umath_validation_data.cpp @@ -1,10 +1,10 @@ #include <algorithm> #include <fstream> #include <iostream> -#include <math.h> +#include <cmath> #include <random> -#include <stdio.h> -#include <time.h> +#include <cstdio> +#include <ctime> #include <vector> struct ufunc { diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py index 0374ae8d7..eb9ae03c6 100755 --- a/numpy/f2py/crackfortran.py +++ b/numpy/f2py/crackfortran.py @@ -2689,7 +2689,7 @@ def analyzevars(block): n_checks = [] n_is_input = l_or(isintent_in, isintent_inout, isintent_inplace)(vars[n]) - if 'dimension' in vars[n]: # n is array + if isarray(vars[n]): # n is array for i, d in enumerate(vars[n]['dimension']): coeffs_and_deps = dimension_exprs.get(d) if coeffs_and_deps is None: @@ -2700,6 +2700,13 @@ def analyzevars(block): # may define variables used in dimension # specifications. for v, (solver, deps) in coeffs_and_deps.items(): + def compute_deps(v, deps): + for v1 in coeffs_and_deps.get(v, [None, []])[1]: + if v1 not in deps: + deps.add(v1) + compute_deps(v1, deps) + all_deps = set() + compute_deps(v, all_deps) if ((v in n_deps or '=' in vars[v] or 'depend' in vars[v])): @@ -2708,7 +2715,7 @@ def analyzevars(block): # - has user-defined initialization expression # - has user-defined dependencies continue - if solver is not None: + if solver is not None and v not in all_deps: # v can be solved from d, hence, we # make it an optional argument with # initialization expression: diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py index eaa559528..feb181bcb 100755 --- a/numpy/f2py/rules.py +++ b/numpy/f2py/rules.py @@ -50,7 +50,7 @@ $Date: 2005/08/30 08:58:42 $ Pearu Peterson """ -import os +import os, sys import time import copy @@ -1202,8 +1202,8 @@ def buildmodule(m, um): break if not nb: - errmess( - 'buildmodule: Could not found the body of interfaced routine "%s". Skipping.\n' % (n)) + print( + 'buildmodule: Could not find the body of interfaced routine "%s". Skipping.\n' % (n), file=sys.stderr) continue nb_list = [nb] if 'entry' in nb: diff --git a/numpy/f2py/tests/src/cli/hi77.f b/numpy/f2py/tests/src/cli/hi77.f new file mode 100644 index 000000000..8b916ebe0 --- /dev/null +++ b/numpy/f2py/tests/src/cli/hi77.f @@ -0,0 +1,3 @@ + SUBROUTINE HI + PRINT*, "HELLO WORLD" + END SUBROUTINE diff --git a/numpy/f2py/tests/src/cli/hiworld.f90 b/numpy/f2py/tests/src/cli/hiworld.f90 new file mode 100644 index 000000000..981f87754 --- /dev/null +++ b/numpy/f2py/tests/src/cli/hiworld.f90 @@ -0,0 +1,3 @@ +function hi() + print*, "Hello World" +end function diff --git a/numpy/f2py/tests/src/negative_bounds/issue_20853.f90 b/numpy/f2py/tests/src/negative_bounds/issue_20853.f90 new file mode 100644 index 000000000..bf1fa9285 --- /dev/null +++ b/numpy/f2py/tests/src/negative_bounds/issue_20853.f90 @@ -0,0 +1,7 @@ +subroutine foo(is_, ie_, arr, tout) + implicit none + integer :: is_,ie_ + real, intent(in) :: arr(is_:ie_) + real, intent(out) :: tout(is_:ie_) + tout = arr +end diff --git a/numpy/f2py/tests/test_f2py2e.py b/numpy/f2py/tests/test_f2py2e.py new file mode 100644 index 000000000..9de043d73 --- /dev/null +++ b/numpy/f2py/tests/test_f2py2e.py @@ -0,0 +1,748 @@ +import textwrap, re, sys, subprocess, shlex +from pathlib import Path +from collections import namedtuple + +import pytest + +from . import util +from numpy.f2py.f2py2e import main as f2pycli + +######################### +# CLI utils and classes # +######################### + +PPaths = namedtuple("PPaths", "finp, f90inp, pyf, wrap77, wrap90, cmodf") + + +def get_io_paths(fname_inp, mname="untitled"): + """Takes in a temporary file for testing and returns the expected output and input paths + + Here expected output is essentially one of any of the possible generated + files. + + ..note:: + + Since this does not actually run f2py, none of these are guaranteed to + exist, and module names are typically incorrect + + Parameters + ---------- + fname_inp : str + The input filename + mname : str, optional + The name of the module, untitled by default + + Returns + ------- + genp : NamedTuple PPaths + The possible paths which are generated, not all of which exist + """ + bpath = Path(fname_inp) + return PPaths( + finp=bpath.with_suffix(".f"), + f90inp=bpath.with_suffix(".f90"), + pyf=bpath.with_suffix(".pyf"), + wrap77=bpath.with_name(f"{mname}-f2pywrappers.f"), + wrap90=bpath.with_name(f"{mname}-f2pywrappers2.f90"), + cmodf=bpath.with_name(f"{mname}module.c"), + ) + + +############## +# CLI Fixtures and Tests # +############# + + +@pytest.fixture(scope="session") +def hello_world_f90(tmpdir_factory): + """Generates a single f90 file for testing""" + fdat = util.getpath("tests", "src", "cli", "hiworld.f90").read_text() + fn = tmpdir_factory.getbasetemp() / "hello.f90" + fn.write_text(fdat, encoding="ascii") + return fn + + +@pytest.fixture(scope="session") +def hello_world_f77(tmpdir_factory): + """Generates a single f77 file for testing""" + fdat = util.getpath("tests", "src", "cli", "hi77.f").read_text() + fn = tmpdir_factory.getbasetemp() / "hello.f" + fn.write_text(fdat, encoding="ascii") + return fn + + +@pytest.fixture(scope="session") +def retreal_f77(tmpdir_factory): + """Generates a single f77 file for testing""" + fdat = util.getpath("tests", "src", "return_real", "foo77.f").read_text() + fn = tmpdir_factory.getbasetemp() / "foo.f" + fn.write_text(fdat, encoding="ascii") + return fn + + +def test_gen_pyf(capfd, hello_world_f90, monkeypatch): + """Ensures that a signature file is generated via the CLI + CLI :: -h + """ + ipath = Path(hello_world_f90) + opath = Path(hello_world_f90).stem + ".pyf" + monkeypatch.setattr(sys, "argv", f'f2py -h {opath} {ipath}'.split()) + + with util.switchdir(ipath.parent): + f2pycli() # Generate wrappers + out, _ = capfd.readouterr() + assert "Saving signatures to file" in out + assert Path(f'{opath}').exists() + + +def test_gen_pyf_stdout(capfd, hello_world_f90, monkeypatch): + """Ensures that a signature file can be dumped to stdout + CLI :: -h + """ + ipath = Path(hello_world_f90) + monkeypatch.setattr(sys, "argv", f'f2py -h stdout {ipath}'.split()) + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "Saving signatures to file" in out + + +def test_gen_pyf_no_overwrite(capfd, hello_world_f90, monkeypatch): + """Ensures that the CLI refuses to overwrite signature files + CLI :: -h without --overwrite-signature + """ + ipath = Path(hello_world_f90) + monkeypatch.setattr(sys, "argv", f'f2py -h faker.pyf {ipath}'.split()) + + with util.switchdir(ipath.parent): + Path("faker.pyf").write_text("Fake news", encoding="ascii") + with pytest.raises(SystemExit): + f2pycli() # Refuse to overwrite + _, err = capfd.readouterr() + assert "Use --overwrite-signature to overwrite" in err + + +@pytest.mark.xfail +def test_f2py_skip(capfd, retreal_f77, monkeypatch): + """Tests that functions can be skipped + CLI :: skip: + """ + foutl = get_io_paths(retreal_f77, mname="test") + ipath = foutl.finp + toskip = "t0 t4 t8 sd s8 s4" + remaining = "td s0" + monkeypatch.setattr( + sys, "argv", + f'f2py {ipath} -m test skip: {toskip}'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, err = capfd.readouterr() + for skey in toskip.split(): + assert ( + f'buildmodule: Could not found the body of interfaced routine "{skey}". Skipping.' + in err) + for rkey in remaining.split(): + assert f'Constructing wrapper function "{rkey}"' in out + + +def test_f2py_only(capfd, retreal_f77, monkeypatch): + """Test that functions can be kept by only: + CLI :: only: + """ + foutl = get_io_paths(retreal_f77, mname="test") + ipath = foutl.finp + toskip = "t0 t4 t8 sd s8 s4" + tokeep = "td s0" + monkeypatch.setattr( + sys, "argv", + f'f2py {ipath} -m test only: {tokeep}'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, err = capfd.readouterr() + for skey in toskip.split(): + assert ( + f'buildmodule: Could not find the body of interfaced routine "{skey}". Skipping.' + in err) + for rkey in tokeep.split(): + assert f'Constructing wrapper function "{rkey}"' in out + + +def test_file_processing_switch(capfd, hello_world_f90, retreal_f77, + monkeypatch): + """Tests that it is possible to return to file processing mode + CLI :: : + BUG: numpy-gh #20520 + """ + foutl = get_io_paths(retreal_f77, mname="test") + ipath = foutl.finp + toskip = "t0 t4 t8 sd s8 s4" + ipath2 = Path(hello_world_f90) + tokeep = "td s0 hi" # hi is in ipath2 + mname = "blah" + monkeypatch.setattr( + sys, + "argv", + f'f2py {ipath} -m {mname} only: {tokeep} : {ipath2}'.split( + ), + ) + + with util.switchdir(ipath.parent): + f2pycli() + out, err = capfd.readouterr() + for skey in toskip.split(): + assert ( + f'buildmodule: Could not find the body of interfaced routine "{skey}". Skipping.' + in err) + for rkey in tokeep.split(): + assert f'Constructing wrapper function "{rkey}"' in out + + +def test_mod_gen_f77(capfd, hello_world_f90, monkeypatch): + """Checks the generation of files based on a module name + CLI :: -m + """ + MNAME = "hi" + foutl = get_io_paths(hello_world_f90, mname=MNAME) + ipath = foutl.f90inp + monkeypatch.setattr(sys, "argv", f'f2py {ipath} -m {MNAME}'.split()) + with util.switchdir(ipath.parent): + f2pycli() + + # Always generate C module + assert Path.exists(foutl.cmodf) + # File contains a function, check for F77 wrappers + assert Path.exists(foutl.wrap77) + + +def test_lower_cmod(capfd, hello_world_f77, monkeypatch): + """Lowers cases by flag or when -h is present + + CLI :: --[no-]lower + """ + foutl = get_io_paths(hello_world_f77, mname="test") + ipath = foutl.finp + capshi = re.compile(r"HI\(\)") + capslo = re.compile(r"hi\(\)") + # Case I: --lower is passed + monkeypatch.setattr(sys, "argv", f'f2py {ipath} -m test --lower'.split()) + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert capslo.search(out) is not None + assert capshi.search(out) is None + # Case II: --no-lower is passed + monkeypatch.setattr(sys, "argv", + f'f2py {ipath} -m test --no-lower'.split()) + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert capslo.search(out) is None + assert capshi.search(out) is not None + + +def test_lower_sig(capfd, hello_world_f77, monkeypatch): + """Lowers cases in signature files by flag or when -h is present + + CLI :: --[no-]lower -h + """ + foutl = get_io_paths(hello_world_f77, mname="test") + ipath = foutl.finp + # Signature files + capshi = re.compile(r"Block: HI") + capslo = re.compile(r"Block: hi") + # Case I: --lower is implied by -h + # TODO: Clean up to prevent passing --overwrite-signature + monkeypatch.setattr( + sys, + "argv", + f'f2py {ipath} -h {foutl.pyf} -m test --overwrite-signature'.split(), + ) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert capslo.search(out) is not None + assert capshi.search(out) is None + + # Case II: --no-lower overrides -h + monkeypatch.setattr( + sys, + "argv", + f'f2py {ipath} -h {foutl.pyf} -m test --overwrite-signature --no-lower' + .split(), + ) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert capslo.search(out) is None + assert capshi.search(out) is not None + + +def test_build_dir(capfd, hello_world_f90, monkeypatch): + """Ensures that the build directory can be specified + + CLI :: --build-dir + """ + ipath = Path(hello_world_f90) + mname = "blah" + odir = "tttmp" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --build-dir {odir}'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert f"Wrote C/API module \"{mname}\"" in out + + +def test_overwrite(capfd, hello_world_f90, monkeypatch): + """Ensures that the build directory can be specified + + CLI :: --overwrite-signature + """ + ipath = Path(hello_world_f90) + monkeypatch.setattr( + sys, "argv", + f'f2py -h faker.pyf {ipath} --overwrite-signature'.split()) + + with util.switchdir(ipath.parent): + Path("faker.pyf").write_text("Fake news", encoding="ascii") + f2pycli() + out, _ = capfd.readouterr() + assert "Saving signatures to file" in out + + +def test_latexdoc(capfd, hello_world_f90, monkeypatch): + """Ensures that TeX documentation is written out + + CLI :: --latex-doc + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --latex-doc'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "Documentation is saved to file" in out + with Path(f"{mname}module.tex").open() as otex: + assert "\\documentclass" in otex.read() + + +def test_nolatexdoc(capfd, hello_world_f90, monkeypatch): + """Ensures that TeX documentation is written out + + CLI :: --no-latex-doc + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --no-latex-doc'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "Documentation is saved to file" not in out + + +def test_shortlatex(capfd, hello_world_f90, monkeypatch): + """Ensures that truncated documentation is written out + + TODO: Test to ensure this has no effect without --latex-doc + CLI :: --latex-doc --short-latex + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr( + sys, + "argv", + f'f2py -m {mname} {ipath} --latex-doc --short-latex'.split(), + ) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "Documentation is saved to file" in out + with Path(f"./{mname}module.tex").open() as otex: + assert "\\documentclass" not in otex.read() + + +def test_restdoc(capfd, hello_world_f90, monkeypatch): + """Ensures that RsT documentation is written out + + CLI :: --rest-doc + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --rest-doc'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "ReST Documentation is saved to file" in out + with Path(f"./{mname}module.rest").open() as orst: + assert r".. -*- rest -*-" in orst.read() + + +def test_norestexdoc(capfd, hello_world_f90, monkeypatch): + """Ensures that TeX documentation is written out + + CLI :: --no-rest-doc + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --no-rest-doc'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "ReST Documentation is saved to file" not in out + + +def test_debugcapi(capfd, hello_world_f90, monkeypatch): + """Ensures that debugging wrappers are written + + CLI :: --debug-capi + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --debug-capi'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + with Path(f"./{mname}module.c").open() as ocmod: + assert r"#define DEBUGCFUNCS" in ocmod.read() + + +@pytest.mark.xfail(reason="Consistently fails on CI.") +def test_debugcapi_bld(hello_world_f90, monkeypatch): + """Ensures that debugging wrappers work + + CLI :: --debug-capi -c + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} -c --debug-capi'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + cmd_run = shlex.split("python3 -c \"import blah; blah.hi()\"") + rout = subprocess.run(cmd_run, capture_output=True, encoding='UTF-8') + eout = ' Hello World\n' + eerr = textwrap.dedent("""\ +debug-capi:Python C/API function blah.hi() +debug-capi:float hi=:output,hidden,scalar +debug-capi:hi=0 +debug-capi:Fortran subroutine `f2pywraphi(&hi)' +debug-capi:hi=0 +debug-capi:Building return value. +debug-capi:Python C/API function blah.hi: successful. +debug-capi:Freeing memory. + """) + assert rout.stdout == eout + assert rout.stderr == eerr + + +def test_wrapfunc_def(capfd, hello_world_f90, monkeypatch): + """Ensures that fortran subroutine wrappers for F77 are included by default + + CLI :: --[no]-wrap-functions + """ + # Implied + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", f'f2py -m {mname} {ipath}'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert r"Fortran 77 wrappers are saved to" in out + + # Explicit + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --wrap-functions'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert r"Fortran 77 wrappers are saved to" in out + + +def test_nowrapfunc(capfd, hello_world_f90, monkeypatch): + """Ensures that fortran subroutine wrappers for F77 can be disabled + + CLI :: --no-wrap-functions + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr(sys, "argv", + f'f2py -m {mname} {ipath} --no-wrap-functions'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert r"Fortran 77 wrappers are saved to" not in out + + +def test_inclheader(capfd, hello_world_f90, monkeypatch): + """Add to the include directories + + CLI :: -include + TODO: Document this in the help string + """ + ipath = Path(hello_world_f90) + mname = "blah" + monkeypatch.setattr( + sys, + "argv", + f'f2py -m {mname} {ipath} -include<stdbool.h> -include<stdio.h> '. + split(), + ) + + with util.switchdir(ipath.parent): + f2pycli() + with Path(f"./{mname}module.c").open() as ocmod: + ocmr = ocmod.read() + assert "#include <stdbool.h>" in ocmr + assert "#include <stdio.h>" in ocmr + + +def test_inclpath(): + """Add to the include directories + + CLI :: --include-paths + """ + # TODO: populate + pass + + +def test_hlink(): + """Add to the include directories + + CLI :: --help-link + """ + # TODO: populate + pass + + +def test_f2cmap(): + """Check that Fortran-to-Python KIND specs can be passed + + CLI :: --f2cmap + """ + # TODO: populate + pass + + +def test_quiet(capfd, hello_world_f90, monkeypatch): + """Reduce verbosity + + CLI :: --quiet + """ + ipath = Path(hello_world_f90) + monkeypatch.setattr(sys, "argv", f'f2py -m blah {ipath} --quiet'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert len(out) == 0 + + +def test_verbose(capfd, hello_world_f90, monkeypatch): + """Increase verbosity + + CLI :: --verbose + """ + ipath = Path(hello_world_f90) + monkeypatch.setattr(sys, "argv", f'f2py -m blah {ipath} --verbose'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + out, _ = capfd.readouterr() + assert "analyzeline" in out + + +def test_version(capfd, monkeypatch): + """Ensure version + + CLI :: -v + """ + monkeypatch.setattr(sys, "argv", 'f2py -v'.split()) + # TODO: f2py2e should not call sys.exit() after printing the version + with pytest.raises(SystemExit): + f2pycli() + out, _ = capfd.readouterr() + import numpy as np + assert np.__version__ == out.strip() + + +@pytest.mark.xfail(reason="Consistently fails on CI.") +def test_npdistop(hello_world_f90, monkeypatch): + """ + CLI :: -c + """ + ipath = Path(hello_world_f90) + monkeypatch.setattr(sys, "argv", f'f2py -m blah {ipath} -c'.split()) + + with util.switchdir(ipath.parent): + f2pycli() + cmd_run = shlex.split("python -c \"import blah; blah.hi()\"") + rout = subprocess.run(cmd_run, capture_output=True, encoding='UTF-8') + eout = ' Hello World\n' + assert rout.stdout == eout + + +# Numpy distutils flags +# TODO: These should be tested separately + + +def test_npd_fcompiler(): + """ + CLI :: -c --fcompiler + """ + # TODO: populate + pass + + +def test_npd_compiler(): + """ + CLI :: -c --compiler + """ + # TODO: populate + pass + + +def test_npd_help_fcompiler(): + """ + CLI :: -c --help-fcompiler + """ + # TODO: populate + pass + + +def test_npd_f77exec(): + """ + CLI :: -c --f77exec + """ + # TODO: populate + pass + + +def test_npd_f90exec(): + """ + CLI :: -c --f90exec + """ + # TODO: populate + pass + + +def test_npd_f77flags(): + """ + CLI :: -c --f77flags + """ + # TODO: populate + pass + + +def test_npd_f90flags(): + """ + CLI :: -c --f90flags + """ + # TODO: populate + pass + + +def test_npd_opt(): + """ + CLI :: -c --opt + """ + # TODO: populate + pass + + +def test_npd_arch(): + """ + CLI :: -c --arch + """ + # TODO: populate + pass + + +def test_npd_noopt(): + """ + CLI :: -c --noopt + """ + # TODO: populate + pass + + +def test_npd_noarch(): + """ + CLI :: -c --noarch + """ + # TODO: populate + pass + + +def test_npd_debug(): + """ + CLI :: -c --debug + """ + # TODO: populate + pass + + +def test_npd_link_auto(): + """ + CLI :: -c --link-<resource> + """ + # TODO: populate + pass + + +def test_npd_lib(): + """ + CLI :: -c -L/path/to/lib/ -l<libname> + """ + # TODO: populate + pass + + +def test_npd_define(): + """ + CLI :: -D<define> + """ + # TODO: populate + pass + + +def test_npd_undefine(): + """ + CLI :: -U<name> + """ + # TODO: populate + pass + + +def test_npd_incl(): + """ + CLI :: -I/path/to/include/ + """ + # TODO: populate + pass + + +def test_npd_linker(): + """ + CLI :: <filename>.o <filename>.so <filename>.a + """ + # TODO: populate + pass diff --git a/numpy/f2py/tests/test_regression.py b/numpy/f2py/tests/test_regression.py index 40b9d4327..044f952f2 100644 --- a/numpy/f2py/tests/test_regression.py +++ b/numpy/f2py/tests/test_regression.py @@ -22,6 +22,25 @@ class TestIntentInOut(util.F2PyTest): assert np.allclose(x, [3, 1, 2]) +class TestNegativeBounds(util.F2PyTest): + # Check that negative bounds work correctly + sources = [util.getpath("tests", "src", "negative_bounds", "issue_20853.f90")] + + @pytest.mark.slow + def test_negbound(self): + xvec = np.arange(12) + xlow = -6 + xhigh = 4 + # Calculate the upper bound, + # Keeping the 1 index in mind + def ubound(xl, xh): + return xh - xl + 1 + rval = self.module.foo(is_=xlow, ie_=xhigh, + arr=xvec[:ubound(xlow, xhigh)]) + expval = np.arange(11, dtype = np.float32) + assert np.allclose(rval, expval) + + class TestNumpyVersionAttribute(util.F2PyTest): # Check that th attribute __f2py_numpy_version__ is present # in the compiled module and that has the value np.__version__. diff --git a/numpy/linalg/setup.py b/numpy/linalg/setup.py index 94536bb2c..82c688d91 100644 --- a/numpy/linalg/setup.py +++ b/numpy/linalg/setup.py @@ -69,9 +69,13 @@ def configuration(parent_package='', top_path=None): # umath_linalg module config.add_extension( '_umath_linalg', - sources=['umath_linalg.c.src', get_lapack_lite_sources], + sources=['umath_linalg.cpp', get_lapack_lite_sources], depends=['lapack_lite/f2c.h'], extra_info=lapack_info, + extra_cxx_compile_args=['-std=c++11', + '-D__STDC_VERSION__=0', + '-fno-exceptions', + '-fno-rtti'], libraries=['npymath'], ) config.add_data_files('*.pyi') diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.cpp index f8a154445..bbeb37906 100644 --- a/numpy/linalg/umath_linalg.c.src +++ b/numpy/linalg/umath_linalg.cpp @@ -18,14 +18,20 @@ #include "npy_cblas.h" -#include <stddef.h> -#include <stdio.h> -#include <assert.h> -#include <math.h> +#include <cstddef> +#include <cstdio> +#include <cassert> +#include <cmath> +#include <utility> static const char* umath_linalg_version_string = "0.1.5"; +struct scalar_trait {}; +struct complex_trait {}; +template<typename typ> +using dispatch_scalar = typename std::conditional<std::is_scalar<typ>::value, scalar_trait, complex_trait>::type; + /* **************************************************************************** * Debugging support * @@ -78,28 +84,28 @@ typedef double fortran_doublereal; typedef f2c_complex fortran_complex; typedef f2c_doublecomplex fortran_doublecomplex; -extern fortran_int +extern "C" fortran_int FNAME(sgeev)(char *jobvl, char *jobvr, fortran_int *n, float a[], fortran_int *lda, float wr[], float wi[], float vl[], fortran_int *ldvl, float vr[], fortran_int *ldvr, float work[], fortran_int lwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dgeev)(char *jobvl, char *jobvr, fortran_int *n, double a[], fortran_int *lda, double wr[], double wi[], double vl[], fortran_int *ldvl, double vr[], fortran_int *ldvr, double work[], fortran_int lwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cgeev)(char *jobvl, char *jobvr, fortran_int *n, - f2c_doublecomplex a[], fortran_int *lda, - f2c_doublecomplex w[], - f2c_doublecomplex vl[], fortran_int *ldvl, - f2c_doublecomplex vr[], fortran_int *ldvr, - f2c_doublecomplex work[], fortran_int *lwork, - double rwork[], + f2c_complex a[], fortran_int *lda, + f2c_complex w[], + f2c_complex vl[], fortran_int *ldvl, + f2c_complex vr[], fortran_int *ldvr, + f2c_complex work[], fortran_int *lwork, + float rwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zgeev)(char *jobvl, char *jobvr, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, f2c_doublecomplex w[], @@ -109,24 +115,24 @@ FNAME(zgeev)(char *jobvl, char *jobvr, fortran_int *n, double rwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(ssyevd)(char *jobz, char *uplo, fortran_int *n, float a[], fortran_int *lda, float w[], float work[], fortran_int *lwork, fortran_int iwork[], fortran_int *liwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dsyevd)(char *jobz, char *uplo, fortran_int *n, double a[], fortran_int *lda, double w[], double work[], fortran_int *lwork, fortran_int iwork[], fortran_int *liwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cheevd)(char *jobz, char *uplo, fortran_int *n, f2c_complex a[], fortran_int *lda, float w[], f2c_complex work[], fortran_int *lwork, float rwork[], fortran_int *lrwork, fortran_int iwork[], fortran_int *liwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zheevd)(char *jobz, char *uplo, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, double w[], f2c_doublecomplex work[], @@ -134,19 +140,19 @@ FNAME(zheevd)(char *jobz, char *uplo, fortran_int *n, fortran_int *liwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(sgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs, float a[], fortran_int *lda, float b[], fortran_int *ldb, float s[], float *rcond, fortran_int *rank, float work[], fortran_int *lwork, fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs, double a[], fortran_int *lda, double b[], fortran_int *ldb, double s[], double *rcond, fortran_int *rank, double work[], fortran_int *lwork, fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs, f2c_complex a[], fortran_int *lda, f2c_complex b[], fortran_int *ldb, @@ -154,7 +160,7 @@ FNAME(cgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs, f2c_complex work[], fortran_int *lwork, float rwork[], fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs, f2c_doublecomplex a[], fortran_int *lda, f2c_doublecomplex b[], fortran_int *ldb, @@ -163,105 +169,105 @@ FNAME(zgelsd)(fortran_int *m, fortran_int *n, fortran_int *nrhs, double rwork[], fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dgeqrf)(fortran_int *m, fortran_int *n, double a[], fortran_int *lda, double tau[], double work[], fortran_int *lwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zgeqrf)(fortran_int *m, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, f2c_doublecomplex tau[], f2c_doublecomplex work[], fortran_int *lwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dorgqr)(fortran_int *m, fortran_int *n, fortran_int *k, double a[], fortran_int *lda, double tau[], double work[], fortran_int *lwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zungqr)(fortran_int *m, fortran_int *n, fortran_int *k, f2c_doublecomplex a[], fortran_int *lda, f2c_doublecomplex tau[], f2c_doublecomplex work[], fortran_int *lwork, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(sgesv)(fortran_int *n, fortran_int *nrhs, float a[], fortran_int *lda, fortran_int ipiv[], float b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dgesv)(fortran_int *n, fortran_int *nrhs, double a[], fortran_int *lda, fortran_int ipiv[], double b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cgesv)(fortran_int *n, fortran_int *nrhs, f2c_complex a[], fortran_int *lda, fortran_int ipiv[], f2c_complex b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zgesv)(fortran_int *n, fortran_int *nrhs, f2c_doublecomplex a[], fortran_int *lda, fortran_int ipiv[], f2c_doublecomplex b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(sgetrf)(fortran_int *m, fortran_int *n, float a[], fortran_int *lda, fortran_int ipiv[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dgetrf)(fortran_int *m, fortran_int *n, double a[], fortran_int *lda, fortran_int ipiv[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cgetrf)(fortran_int *m, fortran_int *n, f2c_complex a[], fortran_int *lda, fortran_int ipiv[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zgetrf)(fortran_int *m, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, fortran_int ipiv[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(spotrf)(char *uplo, fortran_int *n, float a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dpotrf)(char *uplo, fortran_int *n, double a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cpotrf)(char *uplo, fortran_int *n, f2c_complex a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zpotrf)(char *uplo, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(sgesdd)(char *jobz, fortran_int *m, fortran_int *n, float a[], fortran_int *lda, float s[], float u[], fortran_int *ldu, float vt[], fortran_int *ldvt, float work[], fortran_int *lwork, fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dgesdd)(char *jobz, fortran_int *m, fortran_int *n, double a[], fortran_int *lda, double s[], double u[], fortran_int *ldu, double vt[], fortran_int *ldvt, double work[], fortran_int *lwork, fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cgesdd)(char *jobz, fortran_int *m, fortran_int *n, f2c_complex a[], fortran_int *lda, float s[], f2c_complex u[], fortran_int *ldu, f2c_complex vt[], fortran_int *ldvt, f2c_complex work[], fortran_int *lwork, float rwork[], fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zgesdd)(char *jobz, fortran_int *m, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, double s[], f2c_doublecomplex u[], fortran_int *ldu, @@ -269,87 +275,87 @@ FNAME(zgesdd)(char *jobz, fortran_int *m, fortran_int *n, f2c_doublecomplex work[], fortran_int *lwork, double rwork[], fortran_int iwork[], fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(spotrs)(char *uplo, fortran_int *n, fortran_int *nrhs, float a[], fortran_int *lda, float b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dpotrs)(char *uplo, fortran_int *n, fortran_int *nrhs, double a[], fortran_int *lda, double b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cpotrs)(char *uplo, fortran_int *n, fortran_int *nrhs, f2c_complex a[], fortran_int *lda, f2c_complex b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zpotrs)(char *uplo, fortran_int *n, fortran_int *nrhs, f2c_doublecomplex a[], fortran_int *lda, f2c_doublecomplex b[], fortran_int *ldb, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(spotri)(char *uplo, fortran_int *n, float a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(dpotri)(char *uplo, fortran_int *n, double a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(cpotri)(char *uplo, fortran_int *n, f2c_complex a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(zpotri)(char *uplo, fortran_int *n, f2c_doublecomplex a[], fortran_int *lda, fortran_int *info); -extern fortran_int +extern "C" fortran_int FNAME(scopy)(fortran_int *n, float *sx, fortran_int *incx, float *sy, fortran_int *incy); -extern fortran_int +extern "C" fortran_int FNAME(dcopy)(fortran_int *n, double *sx, fortran_int *incx, double *sy, fortran_int *incy); -extern fortran_int +extern "C" fortran_int FNAME(ccopy)(fortran_int *n, f2c_complex *sx, fortran_int *incx, f2c_complex *sy, fortran_int *incy); -extern fortran_int +extern "C" fortran_int FNAME(zcopy)(fortran_int *n, f2c_doublecomplex *sx, fortran_int *incx, f2c_doublecomplex *sy, fortran_int *incy); -extern float +extern "C" float FNAME(sdot)(fortran_int *n, float *sx, fortran_int *incx, float *sy, fortran_int *incy); -extern double +extern "C" double FNAME(ddot)(fortran_int *n, double *sx, fortran_int *incx, double *sy, fortran_int *incy); -extern void +extern "C" void FNAME(cdotu)(f2c_complex *ret, fortran_int *n, f2c_complex *sx, fortran_int *incx, f2c_complex *sy, fortran_int *incy); -extern void +extern "C" void FNAME(zdotu)(f2c_doublecomplex *ret, fortran_int *n, f2c_doublecomplex *sx, fortran_int *incx, f2c_doublecomplex *sy, fortran_int *incy); -extern void +extern "C" void FNAME(cdotc)(f2c_complex *ret, fortran_int *n, f2c_complex *sx, fortran_int *incx, f2c_complex *sy, fortran_int *incy); -extern void +extern "C" void FNAME(zdotc)(f2c_doublecomplex *ret, fortran_int *n, f2c_doublecomplex *sx, fortran_int *incx, f2c_doublecomplex *sy, fortran_int *incy); -extern fortran_int +extern "C" fortran_int FNAME(sgemm)(char *transa, char *transb, fortran_int *m, fortran_int *n, fortran_int *k, float *alpha, @@ -357,7 +363,7 @@ FNAME(sgemm)(char *transa, char *transb, float *b, fortran_int *ldb, float *beta, float *c, fortran_int *ldc); -extern fortran_int +extern "C" fortran_int FNAME(dgemm)(char *transa, char *transb, fortran_int *m, fortran_int *n, fortran_int *k, double *alpha, @@ -365,7 +371,7 @@ FNAME(dgemm)(char *transa, char *transb, double *b, fortran_int *ldb, double *beta, double *c, fortran_int *ldc); -extern fortran_int +extern "C" fortran_int FNAME(cgemm)(char *transa, char *transb, fortran_int *m, fortran_int *n, fortran_int *k, f2c_complex *alpha, @@ -373,7 +379,7 @@ FNAME(cgemm)(char *transa, char *transb, f2c_complex *b, fortran_int *ldb, f2c_complex *beta, f2c_complex *c, fortran_int *ldc); -extern fortran_int +extern "C" fortran_int FNAME(zgemm)(char *transa, char *transb, fortran_int *m, fortran_int *n, fortran_int *k, f2c_doublecomplex *alpha, @@ -400,7 +406,7 @@ FNAME(zgemm)(char *transa, char *transb, ***************************************************************************** */ -static NPY_INLINE int +static inline int get_fp_invalid_and_clear(void) { int status; @@ -408,7 +414,7 @@ get_fp_invalid_and_clear(void) return !!(status & NPY_FPE_INVALID); } -static NPY_INLINE void +static inline void set_fp_invalid_or_clear(int error_occurred) { if (error_occurred) { @@ -427,80 +433,92 @@ set_fp_invalid_or_clear(int error_occurred) #define UMATH_LINALG_MODULE_NAME "_umath_linalg" -typedef union { - fortran_complex f; - npy_cfloat npy; - float array[2]; -} COMPLEX_t; - -typedef union { - fortran_doublecomplex f; - npy_cdouble npy; - double array[2]; -} DOUBLECOMPLEX_t; - -static float s_one; -static float s_zero; -static float s_minus_one; -static float s_ninf; -static float s_nan; -static double d_one; -static double d_zero; -static double d_minus_one; -static double d_ninf; -static double d_nan; -static COMPLEX_t c_one; -static COMPLEX_t c_zero; -static COMPLEX_t c_minus_one; -static COMPLEX_t c_ninf; -static COMPLEX_t c_nan; -static DOUBLECOMPLEX_t z_one; -static DOUBLECOMPLEX_t z_zero; -static DOUBLECOMPLEX_t z_minus_one; -static DOUBLECOMPLEX_t z_ninf; -static DOUBLECOMPLEX_t z_nan; - -static void init_constants(void) -{ - /* - this is needed as NPY_INFINITY and NPY_NAN macros - can't be used as initializers. I prefer to just set - all the constants the same way. - */ - s_one = 1.0f; - s_zero = 0.0f; - s_minus_one = -1.0f; - s_ninf = -NPY_INFINITYF; - s_nan = NPY_NANF; - - d_one = 1.0; - d_zero = 0.0; - d_minus_one = -1.0; - d_ninf = -NPY_INFINITY; - d_nan = NPY_NAN; - - c_one.array[0] = 1.0f; - c_one.array[1] = 0.0f; - c_zero.array[0] = 0.0f; - c_zero.array[1] = 0.0f; - c_minus_one.array[0] = -1.0f; - c_minus_one.array[1] = 0.0f; - c_ninf.array[0] = -NPY_INFINITYF; - c_ninf.array[1] = 0.0f; - c_nan.array[0] = NPY_NANF; - c_nan.array[1] = NPY_NANF; - - z_one.array[0] = 1.0; - z_one.array[1] = 0.0; - z_zero.array[0] = 0.0; - z_zero.array[1] = 0.0; - z_minus_one.array[0] = -1.0; - z_minus_one.array[1] = 0.0; - z_ninf.array[0] = -NPY_INFINITY; - z_ninf.array[1] = 0.0; - z_nan.array[0] = NPY_NAN; - z_nan.array[1] = NPY_NAN; -} +template<typename T> +struct numeric_limits; + +template<> +struct numeric_limits<float> { +static constexpr float one = 1.0f; +static constexpr float zero = 0.0f; +static constexpr float minus_one = -1.0f; +static const float ninf; +static const float nan; +}; +constexpr float numeric_limits<float>::one; +constexpr float numeric_limits<float>::zero; +constexpr float numeric_limits<float>::minus_one; +const float numeric_limits<float>::ninf = -NPY_INFINITYF; +const float numeric_limits<float>::nan = NPY_NANF; + +template<> +struct numeric_limits<double> { +static constexpr double one = 1.0; +static constexpr double zero = 0.0; +static constexpr double minus_one = -1.0; +static const double ninf; +static const double nan; +}; +constexpr double numeric_limits<double>::one; +constexpr double numeric_limits<double>::zero; +constexpr double numeric_limits<double>::minus_one; +const double numeric_limits<double>::ninf = -NPY_INFINITY; +const double numeric_limits<double>::nan = NPY_NAN; + +template<> +struct numeric_limits<npy_cfloat> { +static constexpr npy_cfloat one = {1.0f, 0.0f}; +static constexpr npy_cfloat zero = {0.0f, 0.0f}; +static constexpr npy_cfloat minus_one = {-1.0f, 0.0f}; +static const npy_cfloat ninf; +static const npy_cfloat nan; +}; +constexpr npy_cfloat numeric_limits<npy_cfloat>::one; +constexpr npy_cfloat numeric_limits<npy_cfloat>::zero; +constexpr npy_cfloat numeric_limits<npy_cfloat>::minus_one; +const npy_cfloat numeric_limits<npy_cfloat>::ninf = {-NPY_INFINITYF, 0.0f}; +const npy_cfloat numeric_limits<npy_cfloat>::nan = {NPY_NANF, NPY_NANF}; + +template<> +struct numeric_limits<f2c_complex> { +static constexpr f2c_complex one = {1.0f, 0.0f}; +static constexpr f2c_complex zero = {0.0f, 0.0f}; +static constexpr f2c_complex minus_one = {-1.0f, 0.0f}; +static const f2c_complex ninf; +static const f2c_complex nan; +}; +constexpr f2c_complex numeric_limits<f2c_complex>::one; +constexpr f2c_complex numeric_limits<f2c_complex>::zero; +constexpr f2c_complex numeric_limits<f2c_complex>::minus_one; +const f2c_complex numeric_limits<f2c_complex>::ninf = {-NPY_INFINITYF, 0.0f}; +const f2c_complex numeric_limits<f2c_complex>::nan = {NPY_NANF, NPY_NANF}; + +template<> +struct numeric_limits<npy_cdouble> { +static constexpr npy_cdouble one = {1.0, 0.0}; +static constexpr npy_cdouble zero = {0.0, 0.0}; +static constexpr npy_cdouble minus_one = {-1.0, 0.0}; +static const npy_cdouble ninf; +static const npy_cdouble nan; +}; +constexpr npy_cdouble numeric_limits<npy_cdouble>::one; +constexpr npy_cdouble numeric_limits<npy_cdouble>::zero; +constexpr npy_cdouble numeric_limits<npy_cdouble>::minus_one; +const npy_cdouble numeric_limits<npy_cdouble>::ninf = {-NPY_INFINITY, 0.0}; +const npy_cdouble numeric_limits<npy_cdouble>::nan = {NPY_NAN, NPY_NAN}; + +template<> +struct numeric_limits<f2c_doublecomplex> { +static constexpr f2c_doublecomplex one = {1.0, 0.0}; +static constexpr f2c_doublecomplex zero = {0.0, 0.0}; +static constexpr f2c_doublecomplex minus_one = {-1.0, 0.0}; +static const f2c_doublecomplex ninf; +static const f2c_doublecomplex nan; +}; +constexpr f2c_doublecomplex numeric_limits<f2c_doublecomplex>::one; +constexpr f2c_doublecomplex numeric_limits<f2c_doublecomplex>::zero; +constexpr f2c_doublecomplex numeric_limits<f2c_doublecomplex>::minus_one; +const f2c_doublecomplex numeric_limits<f2c_doublecomplex>::ninf = {-NPY_INFINITY, 0.0}; +const f2c_doublecomplex numeric_limits<f2c_doublecomplex>::nan = {NPY_NAN, NPY_NAN}; /* ***************************************************************************** @@ -529,7 +547,7 @@ typedef struct linearize_data_struct npy_intp output_lead_dim; } LINEARIZE_DATA_t; -static NPY_INLINE void +static inline void init_linearize_data_ex(LINEARIZE_DATA_t *lin_data, npy_intp rows, npy_intp columns, @@ -544,7 +562,7 @@ init_linearize_data_ex(LINEARIZE_DATA_t *lin_data, lin_data->output_lead_dim = output_lead_dim; } -static NPY_INLINE void +static inline void init_linearize_data(LINEARIZE_DATA_t *lin_data, npy_intp rows, npy_intp columns, @@ -555,7 +573,7 @@ init_linearize_data(LINEARIZE_DATA_t *lin_data, lin_data, rows, columns, row_strides, column_strides, columns); } -static NPY_INLINE void +static inline void dump_ufunc_object(PyUFuncObject* ufunc) { TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n", @@ -580,7 +598,7 @@ dump_ufunc_object(PyUFuncObject* ufunc) } } -static NPY_INLINE void +static inline void dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) { TRACE_TXT("\n\t%s rows: %zd columns: %zd"\ @@ -589,37 +607,34 @@ dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params) params->row_strides, params->column_strides); } -static NPY_INLINE void -print_FLOAT(npy_float s) +static inline void +print(npy_float s) { TRACE_TXT(" %8.4f", s); } -static NPY_INLINE void -print_DOUBLE(npy_double d) +static inline void +print(npy_double d) { TRACE_TXT(" %10.6f", d); } -static NPY_INLINE void -print_CFLOAT(npy_cfloat c) +static inline void +print(npy_cfloat c) { float* c_parts = (float*)&c; TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]); } -static NPY_INLINE void -print_CDOUBLE(npy_cdouble z) +static inline void +print(npy_cdouble z) { double* z_parts = (double*)&z; TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]); } -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# - */ -static NPY_INLINE void -dump_@TYPE@_matrix(const char* name, +template<typename typ> +static inline void +dump_matrix(const char* name, size_t rows, size_t columns, - const @typ@* ptr) + const typ* ptr) { size_t i, j; @@ -629,13 +644,12 @@ dump_@TYPE@_matrix(const char* name, TRACE_TXT("| "); for (j = 0; j < columns; j++) { - print_@TYPE@(ptr[j*rows + i]); + print(ptr[j*rows + i]); TRACE_TXT(", "); } TRACE_TXT(" |\n"); } } -/**end repeat**/ /* @@ -644,12 +658,12 @@ dump_@TYPE@_matrix(const char* name, ***************************************************************************** */ -static NPY_INLINE fortran_int +static inline fortran_int fortran_int_min(fortran_int x, fortran_int y) { return x < y ? x : y; } -static NPY_INLINE fortran_int +static inline fortran_int fortran_int_max(fortran_int x, fortran_int y) { return x > y ? x : y; } @@ -736,7 +750,7 @@ fortran_int_max(fortran_int x, fortran_int y) { #define END_OUTER_LOOP } -static NPY_INLINE void +static inline void update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count) { size_t i; @@ -754,45 +768,100 @@ update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count) /* ***************************************************************************** + ** DISPATCHER FUNCS ** + ***************************************************************************** + */ +static fortran_int copy(fortran_int *n, + float *sx, fortran_int *incx, + float *sy, fortran_int *incy) { return FNAME(scopy)(n, sx, incx, + sy, incy); +} +static fortran_int copy(fortran_int *n, + double *sx, fortran_int *incx, + double *sy, fortran_int *incy) { return FNAME(dcopy)(n, sx, incx, + sy, incy); +} +static fortran_int copy(fortran_int *n, + f2c_complex *sx, fortran_int *incx, + f2c_complex *sy, fortran_int *incy) { return FNAME(ccopy)(n, sx, incx, + sy, incy); +} +static fortran_int copy(fortran_int *n, + f2c_doublecomplex *sx, fortran_int *incx, + f2c_doublecomplex *sy, fortran_int *incy) { return FNAME(zcopy)(n, sx, incx, + sy, incy); +} + +static fortran_int getrf(fortran_int *m, fortran_int *n, float a[], fortran_int +*lda, fortran_int ipiv[], fortran_int *info) { + return LAPACK(sgetrf)(m, n, a, lda, ipiv, info); +} +static fortran_int getrf(fortran_int *m, fortran_int *n, double a[], fortran_int +*lda, fortran_int ipiv[], fortran_int *info) { + return LAPACK(dgetrf)(m, n, a, lda, ipiv, info); +} +static fortran_int getrf(fortran_int *m, fortran_int *n, f2c_complex a[], fortran_int +*lda, fortran_int ipiv[], fortran_int *info) { + return LAPACK(cgetrf)(m, n, a, lda, ipiv, info); +} +static fortran_int getrf(fortran_int *m, fortran_int *n, f2c_doublecomplex a[], fortran_int +*lda, fortran_int ipiv[], fortran_int *info) { + return LAPACK(zgetrf)(m, n, a, lda, ipiv, info); +} + +/* + ***************************************************************************** ** HELPER FUNCS ** ***************************************************************************** */ +template<typename T> +struct fortran_type { +using type = T; +}; + +template<> struct fortran_type<npy_cfloat> { using type = f2c_complex;}; +template<> struct fortran_type<npy_cdouble> { using type = f2c_doublecomplex;}; +template<typename T> +using fortran_type_t = typename fortran_type<T>::type; + +template<typename T> +struct basetype { +using type = T; +}; +template<> struct basetype<npy_cfloat> { using type = npy_float;}; +template<> struct basetype<npy_cdouble> { using type = npy_double;}; +template<> struct basetype<f2c_complex> { using type = fortran_real;}; +template<> struct basetype<f2c_doublecomplex> { using type = fortran_doublereal;}; +template<typename T> +using basetype_t = typename basetype<T>::type; /* rearranging of 2D matrices using blas */ -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t# - #copy = scopy, dcopy, ccopy, zcopy# - #nan = s_nan, d_nan, c_nan, z_nan# - #zero = s_zero, d_zero, c_zero, z_zero# - */ -static NPY_INLINE void * -linearize_@TYPE@_matrix(void *dst_in, - void *src_in, +template<typename typ> +static inline void * +linearize_matrix(typ *dst, + typ *src, const LINEARIZE_DATA_t* data) { - @typ@ *src = (@typ@ *) src_in; - @typ@ *dst = (@typ@ *) dst_in; - + using ftyp = fortran_type_t<typ>; if (dst) { int i, j; - @typ@* rv = dst; + typ* rv = dst; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = - (fortran_int)(data->column_strides/sizeof(@typ@)); + (fortran_int)(data->column_strides/sizeof(typ)); fortran_int one = 1; for (i = 0; i < data->rows; i++) { if (column_strides > 0) { - FNAME(@copy@)(&columns, - (void*)src, &column_strides, - (void*)dst, &one); + copy(&columns, + (ftyp*)src, &column_strides, + (ftyp*)dst, &one); } else if (column_strides < 0) { - FNAME(@copy@)(&columns, - (void*)((@typ@*)src + (columns-1)*column_strides), + copy(&columns, + ((ftyp*)src + (columns-1)*column_strides), &column_strides, - (void*)dst, &one); + (ftyp*)dst, &one); } else { /* @@ -801,10 +870,10 @@ linearize_@TYPE@_matrix(void *dst_in, * manually */ for (j = 0; j < columns; ++j) { - memcpy((@typ@*)dst + j, (@typ@*)src, sizeof(@typ@)); + memcpy(dst + j, src, sizeof(typ)); } } - src += data->row_strides/sizeof(@typ@); + src += data->row_strides/sizeof(typ); dst += data->output_lead_dim; } return rv; @@ -813,31 +882,31 @@ linearize_@TYPE@_matrix(void *dst_in, } } -static NPY_INLINE void * -delinearize_@TYPE@_matrix(void *dst_in, - void *src_in, +template<typename typ> +static inline void * +delinearize_matrix(typ *dst, + typ *src, const LINEARIZE_DATA_t* data) { - @typ@ *src = (@typ@ *) src_in; - @typ@ *dst = (@typ@ *) dst_in; +using ftyp = fortran_type_t<typ>; if (src) { int i; - @typ@ *rv = src; + typ *rv = src; fortran_int columns = (fortran_int)data->columns; fortran_int column_strides = - (fortran_int)(data->column_strides/sizeof(@typ@)); + (fortran_int)(data->column_strides/sizeof(typ)); fortran_int one = 1; for (i = 0; i < data->rows; i++) { if (column_strides > 0) { - FNAME(@copy@)(&columns, - (void*)src, &one, - (void*)dst, &column_strides); + copy(&columns, + (ftyp*)src, &one, + (ftyp*)dst, &column_strides); } else if (column_strides < 0) { - FNAME(@copy@)(&columns, - (void*)src, &one, - (void*)((@typ@*)dst + (columns-1)*column_strides), + copy(&columns, + (ftyp*)src, &one, + ((ftyp*)dst + (columns-1)*column_strides), &column_strides); } else { @@ -847,13 +916,13 @@ delinearize_@TYPE@_matrix(void *dst_in, * manually */ if (columns > 0) { - memcpy((@typ@*)dst, - (@typ@*)src + (columns-1), - sizeof(@typ@)); + memcpy(dst, + src + (columns-1), + sizeof(typ)); } } src += data->output_lead_dim; - dst += data->row_strides/sizeof(@typ@); + dst += data->row_strides/sizeof(typ); } return rv; @@ -862,116 +931,97 @@ delinearize_@TYPE@_matrix(void *dst_in, } } -static NPY_INLINE void -nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) +template<typename typ> +static inline void +nan_matrix(typ *dst, const LINEARIZE_DATA_t* data) { - @typ@ *dst = (@typ@ *) dst_in; - int i, j; for (i = 0; i < data->rows; i++) { - @typ@ *cp = dst; - ptrdiff_t cs = data->column_strides/sizeof(@typ@); + typ *cp = dst; + ptrdiff_t cs = data->column_strides/sizeof(typ); for (j = 0; j < data->columns; ++j) { - *cp = @nan@; + *cp = numeric_limits<typ>::nan; cp += cs; } - dst += data->row_strides/sizeof(@typ@); + dst += data->row_strides/sizeof(typ); } } -static NPY_INLINE void -zero_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) +template<typename typ> +static inline void +zero_matrix(typ *dst, const LINEARIZE_DATA_t* data) { - @typ@ *dst = (@typ@ *) dst_in; - int i, j; for (i = 0; i < data->rows; i++) { - @typ@ *cp = dst; - ptrdiff_t cs = data->column_strides/sizeof(@typ@); + typ *cp = dst; + ptrdiff_t cs = data->column_strides/sizeof(typ); for (j = 0; j < data->columns; ++j) { - *cp = @zero@; + *cp = numeric_limits<typ>::zero; cp += cs; } - dst += data->row_strides/sizeof(@typ@); + dst += data->row_strides/sizeof(typ); } } -/**end repeat**/ - /* identity square matrix generation */ -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t# - #cblas_type = s, d, c, z# - */ -static NPY_INLINE void -identity_@TYPE@_matrix(void *ptr, size_t n) +template<typename typ> +static inline void +identity_matrix(typ *matrix, size_t n) { size_t i; - @typ@ *matrix = (@typ@*) ptr; /* in IEEE floating point, zeroes are represented as bitwise 0 */ - memset(matrix, 0, n*n*sizeof(@typ@)); + memset(matrix, 0, n*n*sizeof(typ)); for (i = 0; i < n; ++i) { - *matrix = @cblas_type@_one; + *matrix = numeric_limits<typ>::one; matrix += n+1; } } -/**end repeat**/ /* lower/upper triangular matrix using blas (in place) */ -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #typ = float, double, COMPLEX_t, DOUBLECOMPLEX_t# - #cblas_type = s, d, c, z# - */ - -static NPY_INLINE void -triu_@TYPE@_matrix(void *ptr, size_t n) +template<typename typ> +static inline void +triu_matrix(typ *matrix, size_t n) { size_t i, j; - @typ@ *matrix = (@typ@*)ptr; matrix += n; for (i = 1; i < n; ++i) { for (j = 0; j < i; ++j) { - matrix[j] = @cblas_type@_zero; + matrix[j] = numeric_limits<typ>::zero; } matrix += n; } } -/**end repeat**/ /* -------------------------------------------------------------------------- */ /* Determinants */ -/**begin repeat - #TYPE = FLOAT, DOUBLE# - #typ = npy_float, npy_double# - #log_func = npy_logf, npy_log# - #exp_func = npy_expf, npy_exp# - #zero = 0.0f, 0.0# -*/ +static npy_float npylog(npy_float f) { return npy_logf(f);} +static npy_double npylog(npy_double d) { return npy_log(d);} +static npy_float npyexp(npy_float f) { return npy_expf(f);} +static npy_double npyexp(npy_double d) { return npy_exp(d);} -static NPY_INLINE void -@TYPE@_slogdet_from_factored_diagonal(@typ@* src, +template<typename typ> +static inline void +slogdet_from_factored_diagonal(typ* src, fortran_int m, - @typ@ *sign, - @typ@ *logdet) + typ *sign, + typ *logdet) { - @typ@ acc_sign = *sign; - @typ@ acc_logdet = @zero@; + typ acc_sign = *sign; + typ acc_logdet = numeric_limits<typ>::zero; int i; for (i = 0; i < m; i++) { - @typ@ abs_element = *src; - if (abs_element < @zero@) { + typ abs_element = *src; + if (abs_element < numeric_limits<typ>::zero) { acc_sign = -acc_sign; abs_element = -abs_element; } - acc_logdet += @log_func@(abs_element); + acc_logdet += npylog(abs_element); src += m+1; } @@ -979,32 +1029,26 @@ static NPY_INLINE void *logdet = acc_logdet; } -static NPY_INLINE @typ@ -@TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet) +template<typename typ> +static inline typ +det_from_slogdet(typ sign, typ logdet) { - @typ@ result = sign * @exp_func@(logdet); + typ result = sign * npyexp(logdet); return result; } -/**end repeat**/ +npy_float npyabs(npy_cfloat z) { return npy_cabsf(z);} +npy_double npyabs(npy_cdouble z) { return npy_cabs(z);} -/**begin repeat - #TYPE = CFLOAT, CDOUBLE# - #typ = npy_cfloat, npy_cdouble# - #basetyp = npy_float, npy_double# - #abs_func = npy_cabsf, npy_cabs# - #log_func = npy_logf, npy_log# - #exp_func = npy_expf, npy_exp# - #zero = 0.0f, 0.0# -*/ -#define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0]) -#define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1]) +#define RE(COMPLEX) (COMPLEX).real +#define IM(COMPLEX) (COMPLEX).imag -static NPY_INLINE @typ@ -@TYPE@_mult(@typ@ op1, @typ@ op2) +template<typename typ> +static inline typ +mult(typ op1, typ op2) { - @typ@ rv; + typ rv; RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2); IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2); @@ -1013,25 +1057,26 @@ static NPY_INLINE @typ@ } -static NPY_INLINE void -@TYPE@_slogdet_from_factored_diagonal(@typ@* src, +template<typename typ, typename basetyp> +static inline void +slogdet_from_factored_diagonal(typ* src, fortran_int m, - @typ@ *sign, - @basetyp@ *logdet) + typ *sign, + basetyp *logdet) { int i; - @typ@ sign_acc = *sign; - @basetyp@ logdet_acc = @zero@; + typ sign_acc = *sign; + basetyp logdet_acc = numeric_limits<basetyp>::zero; for (i = 0; i < m; i++) { - @basetyp@ abs_element = @abs_func@(*src); - @typ@ sign_element; + basetyp abs_element = npyabs(*src); + typ sign_element; RE(sign_element) = RE(*src) / abs_element; IM(sign_element) = IM(*src) / abs_element; - sign_acc = @TYPE@_mult(sign_acc, sign_element); - logdet_acc += @log_func@(abs_element); + sign_acc = mult(sign_acc, sign_element); + logdet_acc += npylog(abs_element); src += m + 1; } @@ -1039,17 +1084,17 @@ static NPY_INLINE void *logdet = logdet_acc; } -static NPY_INLINE @typ@ -@TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet) +template<typename typ, typename basetyp> +static inline typ +det_from_slogdet(typ sign, basetyp logdet) { - @typ@ tmp; - RE(tmp) = @exp_func@(logdet); - IM(tmp) = @zero@; - return @TYPE@_mult(sign, tmp); + typ tmp; + RE(tmp) = npyexp(logdet); + IM(tmp) = numeric_limits<basetyp>::zero; + return mult(sign, tmp); } #undef RE #undef IM -/**end repeat**/ /* As in the linalg package, the determinant is computed via LU factorization @@ -1057,26 +1102,20 @@ static NPY_INLINE @typ@ * slogdet computes sign + log(determinant). * det computes sign * exp(slogdet). */ -/**begin repeat - - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# - #basetyp = npy_float, npy_double, npy_float, npy_double# - #cblas_type = s, d, c, z# -*/ - -static NPY_INLINE void -@TYPE@_slogdet_single_element(fortran_int m, - void* src, +template<typename typ, typename basetyp> +static inline void +slogdet_single_element(fortran_int m, + typ* src, fortran_int* pivots, - @typ@ *sign, - @basetyp@ *logdet) + typ *sign, + basetyp *logdet) { +using ftyp = fortran_type_t<typ>; fortran_int info = 0; fortran_int lda = fortran_int_max(m, 1); int i; /* note: done in place */ - LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &lda, pivots, &info); + getrf(&m, &m, (ftyp*)src, &lda, pivots, &info); if (info == 0) { int change_sign = 0; @@ -1086,23 +1125,20 @@ static NPY_INLINE void change_sign += (pivots[i] != (i+1)); } - memcpy(sign, - (change_sign % 2)? - &@cblas_type@_minus_one : - &@cblas_type@_one - , sizeof(*sign)); - @TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet); + *sign = (change_sign % 2)?numeric_limits<typ>::minus_one:numeric_limits<typ>::one; + slogdet_from_factored_diagonal(src, m, sign, logdet); } else { /* if getrf fails, use 0 as sign and -inf as logdet */ - memcpy(sign, &@cblas_type@_zero, sizeof(*sign)); - memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet)); + *sign = numeric_limits<typ>::zero; + *logdet = numeric_limits<basetyp>::ninf; } } +template<typename typ, typename basetyp> static void -@TYPE@_slogdet(char **args, +slogdet(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) @@ -1123,7 +1159,7 @@ static void INIT_OUTER_LOOP_3 m = (fortran_int) dimensions[0]; safe_m = m; - matrix_size = safe_m * safe_m * sizeof(@typ@); + matrix_size = safe_m * safe_m * sizeof(typ); pivot_size = safe_m * sizeof(fortran_int); tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); @@ -1132,20 +1168,21 @@ static void /* swapped steps to get matrix in FORTRAN order */ init_linearize_data(&lin_data, m, m, steps[1], steps[0]); BEGIN_OUTER_LOOP_3 - linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); - @TYPE@_slogdet_single_element(m, - (void*)tmp_buff, + linearize_matrix((typ*)tmp_buff, (typ*)args[0], &lin_data); + slogdet_single_element(m, + (typ*)tmp_buff, (fortran_int*)(tmp_buff+matrix_size), - (@typ@*)args[1], - (@basetyp@*)args[2]); + (typ*)args[1], + (basetyp*)args[2]); END_OUTER_LOOP free(tmp_buff); } } +template<typename typ, typename basetyp> static void -@TYPE@_det(char **args, +det(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) @@ -1166,42 +1203,42 @@ static void INIT_OUTER_LOOP_2 m = (fortran_int) dimensions[0]; safe_m = m; - matrix_size = safe_m * safe_m * sizeof(@typ@); + matrix_size = safe_m * safe_m * sizeof(typ); pivot_size = safe_m * sizeof(fortran_int); tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size); if (tmp_buff) { LINEARIZE_DATA_t lin_data; - @typ@ sign; - @basetyp@ logdet; + typ sign; + basetyp logdet; /* swapped steps to get matrix in FORTRAN order */ init_linearize_data(&lin_data, m, m, steps[1], steps[0]); BEGIN_OUTER_LOOP_2 - linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data); - @TYPE@_slogdet_single_element(m, - (void*)tmp_buff, + linearize_matrix((typ*)tmp_buff, (typ*)args[0], &lin_data); + slogdet_single_element(m, + (typ*)tmp_buff, (fortran_int*)(tmp_buff + matrix_size), &sign, &logdet); - *(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet); + *(typ *)args[1] = det_from_slogdet(sign, logdet); END_OUTER_LOOP free(tmp_buff); } } -/**end repeat**/ /* -------------------------------------------------------------------------- */ /* Eigh family */ -typedef struct eigh_params_struct { - void *A; /* matrix */ - void *W; /* eigenvalue vector */ - void *WORK; /* main work buffer */ - void *RWORK; /* secondary work buffer (for complex versions) */ - void *IWORK; +template<typename typ> +struct EIGH_PARAMS_t { + typ *A; /* matrix */ + basetype_t<typ> *W; /* eigenvalue vector */ + typ *WORK; /* main work buffer */ + basetype_t<typ> *RWORK; /* secondary work buffer (for complex versions) */ + fortran_int *IWORK; fortran_int N; fortran_int LWORK; fortran_int LRWORK; @@ -1209,20 +1246,24 @@ typedef struct eigh_params_struct { char JOBZ; char UPLO; fortran_int LDA; -} EIGH_PARAMS_t; +} ; -/**begin repeat - #TYPE = FLOAT, DOUBLE# - #typ = npy_float, npy_double# - #ftyp = fortran_real, fortran_doublereal# - #lapack_func = ssyevd, dsyevd# -*/ - -static NPY_INLINE fortran_int -call_@lapack_func@(EIGH_PARAMS_t *params) +static inline fortran_int +call_evd(EIGH_PARAMS_t<npy_float> *params) +{ + fortran_int rv; + LAPACK(ssyevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, + params->A, ¶ms->LDA, params->W, + params->WORK, ¶ms->LWORK, + params->IWORK, ¶ms->LIWORK, + &rv); + return rv; +} +static inline fortran_int +call_evd(EIGH_PARAMS_t<npy_double> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, + LAPACK(dsyevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, params->A, ¶ms->LDA, params->W, params->WORK, ¶ms->LWORK, params->IWORK, ¶ms->LIWORK, @@ -1230,13 +1271,15 @@ call_@lapack_func@(EIGH_PARAMS_t *params) return rv; } + /* * Initialize the parameters to use in for the lapack function _syevd * Handles buffer allocation */ -static NPY_INLINE int -init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, - fortran_int N) +template<typename typ> +static inline int +init_evd(EIGH_PARAMS_t<typ>* params, char JOBZ, char UPLO, + fortran_int N, scalar_trait) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; @@ -1244,19 +1287,19 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, fortran_int liwork; npy_uint8 *a, *w, *work, *iwork; size_t safe_N = N; - size_t alloc_size = safe_N * (safe_N + 1) * sizeof(@typ@); + size_t alloc_size = safe_N * (safe_N + 1) * sizeof(typ); fortran_int lda = fortran_int_max(N, 1); - mem_buff = malloc(alloc_size); + mem_buff = (npy_uint8 *)malloc(alloc_size); if (!mem_buff) { goto error; } a = mem_buff; - w = mem_buff + safe_N * safe_N * sizeof(@typ@); + w = mem_buff + safe_N * safe_N * sizeof(typ); - params->A = a; - params->W = w; + params->A = (typ*)a; + params->W = (typ*)w; params->RWORK = NULL; /* unused */ params->N = N; params->LRWORK = 0; /* unused */ @@ -1266,7 +1309,7 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, /* Work size query */ { - @typ@ query_work_size; + typ query_work_size; fortran_int query_iwork_size; params->LWORK = -1; @@ -1274,7 +1317,7 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, params->WORK = &query_work_size; params->IWORK = &query_iwork_size; - if (call_@lapack_func@(params) != 0) { + if (call_evd(params) != 0) { goto error; } @@ -1282,18 +1325,18 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, liwork = query_iwork_size; } - mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int)); + mem_buff2 = (npy_uint8 *)malloc(lwork*sizeof(typ) + liwork*sizeof(fortran_int)); if (!mem_buff2) { goto error; } work = mem_buff2; - iwork = mem_buff2 + lwork*sizeof(@typ@); + iwork = mem_buff2 + lwork*sizeof(typ); params->LWORK = lwork; - params->WORK = work; + params->WORK = (typ*)work; params->LIWORK = liwork; - params->IWORK = iwork; + params->IWORK = (fortran_int*)iwork; return 1; @@ -1305,40 +1348,44 @@ init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO, return 0; } -/**end repeat**/ -/**begin repeat - #TYPE = CFLOAT, CDOUBLE# - #typ = npy_cfloat, npy_cdouble# - #basetyp = npy_float, npy_double# - #ftyp = fortran_complex, fortran_doublecomplex# - #fbasetyp = fortran_real, fortran_doublereal# - #lapack_func = cheevd, zheevd# -*/ -static NPY_INLINE fortran_int -call_@lapack_func@(EIGH_PARAMS_t *params) +static inline fortran_int +call_evd(EIGH_PARAMS_t<npy_cfloat> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, - params->A, ¶ms->LDA, params->W, - params->WORK, ¶ms->LWORK, + LAPACK(cheevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, + (fortran_type_t<npy_cfloat>*)params->A, ¶ms->LDA, params->W, + (fortran_type_t<npy_cfloat>*)params->WORK, ¶ms->LWORK, params->RWORK, ¶ms->LRWORK, params->IWORK, ¶ms->LIWORK, &rv); return rv; } -/* - * Initialize the parameters to use in for the lapack function _heev - * Handles buffer allocation - */ -static NPY_INLINE int -init_@lapack_func@(EIGH_PARAMS_t *params, +static inline fortran_int +call_evd(EIGH_PARAMS_t<npy_cdouble> *params) +{ + fortran_int rv; + LAPACK(zheevd)(¶ms->JOBZ, ¶ms->UPLO, ¶ms->N, + (fortran_type_t<npy_cdouble>*)params->A, ¶ms->LDA, params->W, + (fortran_type_t<npy_cdouble>*)params->WORK, ¶ms->LWORK, + params->RWORK, ¶ms->LRWORK, + params->IWORK, ¶ms->LIWORK, + &rv); + return rv; +} + +template<typename typ> +static inline int +init_evd(EIGH_PARAMS_t<typ> *params, char JOBZ, char UPLO, - fortran_int N) + fortran_int N, complex_trait) { + using basetyp = basetype_t<typ>; +using ftyp = fortran_type_t<typ>; +using fbasetyp = fortran_type_t<basetyp>; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; fortran_int lwork; @@ -1348,16 +1395,16 @@ init_@lapack_func@(EIGH_PARAMS_t *params, size_t safe_N = N; fortran_int lda = fortran_int_max(N, 1); - mem_buff = malloc(safe_N * safe_N * sizeof(@typ@) + - safe_N * sizeof(@basetyp@)); + mem_buff = (npy_uint8 *)malloc(safe_N * safe_N * sizeof(typ) + + safe_N * sizeof(basetyp)); if (!mem_buff) { goto error; } a = mem_buff; - w = mem_buff + safe_N * safe_N * sizeof(@typ@); + w = mem_buff + safe_N * safe_N * sizeof(typ); - params->A = a; - params->W = w; + params->A = (typ*)a; + params->W = (basetyp*)w; params->N = N; params->JOBZ = JOBZ; params->UPLO = UPLO; @@ -1365,40 +1412,40 @@ init_@lapack_func@(EIGH_PARAMS_t *params, /* Work size query */ { - @ftyp@ query_work_size; - @fbasetyp@ query_rwork_size; + ftyp query_work_size; + fbasetyp query_rwork_size; fortran_int query_iwork_size; params->LWORK = -1; params->LRWORK = -1; params->LIWORK = -1; - params->WORK = &query_work_size; - params->RWORK = &query_rwork_size; + params->WORK = (typ*)&query_work_size; + params->RWORK = (basetyp*)&query_rwork_size; params->IWORK = &query_iwork_size; - if (call_@lapack_func@(params) != 0) { + if (call_evd(params) != 0) { goto error; } - lwork = (fortran_int)*(@fbasetyp@*)&query_work_size; + lwork = (fortran_int)*(fbasetyp*)&query_work_size; lrwork = (fortran_int)query_rwork_size; liwork = query_iwork_size; } - mem_buff2 = malloc(lwork*sizeof(@typ@) + - lrwork*sizeof(@basetyp@) + + mem_buff2 = (npy_uint8 *)malloc(lwork*sizeof(typ) + + lrwork*sizeof(basetyp) + liwork*sizeof(fortran_int)); if (!mem_buff2) { goto error; } work = mem_buff2; - rwork = work + lwork*sizeof(@typ@); - iwork = rwork + lrwork*sizeof(@basetyp@); + rwork = work + lwork*sizeof(typ); + iwork = rwork + lrwork*sizeof(basetyp); - params->WORK = work; - params->RWORK = rwork; - params->IWORK = iwork; + params->WORK = (typ*)work; + params->RWORK = (basetyp*)rwork; + params->IWORK = (fortran_int*)iwork; params->LWORK = lwork; params->LRWORK = lrwork; params->LIWORK = liwork; @@ -1413,16 +1460,7 @@ error: return 0; } -/**end repeat**/ - -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #BASETYPE = FLOAT, DOUBLE, FLOAT, DOUBLE# - #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# - #basetyp = npy_float, npy_double, npy_float, npy_double# - #lapack_func = ssyevd, dsyevd, cheevd, zheevd# -**/ /* * (M, M)->(M,)(M, M) * dimensions[1] -> M @@ -1431,8 +1469,9 @@ error: * args[2] -> A[out] */ -static NPY_INLINE void -release_@lapack_func@(EIGH_PARAMS_t *params) +template<typename typ> +static inline void +release_evd(EIGH_PARAMS_t<typ> *params) { /* allocated memory in A and WORK */ free(params->A); @@ -1441,18 +1480,20 @@ release_@lapack_func@(EIGH_PARAMS_t *params) } -static NPY_INLINE void -@TYPE@_eigh_wrapper(char JOBZ, +template<typename typ> +static inline void +eigh_wrapper(char JOBZ, char UPLO, char**args, npy_intp const *dimensions, npy_intp const *steps) { + using basetyp = basetype_t<typ>; ptrdiff_t outer_steps[3]; size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:3; - EIGH_PARAMS_t eigh_params; + EIGH_PARAMS_t<typ> eigh_params; int error_occurred = get_fp_invalid_and_clear(); for (iter = 0; iter < op_count; ++iter) { @@ -1460,10 +1501,10 @@ static NPY_INLINE void } steps += op_count; - if (init_@lapack_func@(&eigh_params, + if (init_evd(&eigh_params, JOBZ, UPLO, - (fortran_int)dimensions[0])) { + (fortran_int)dimensions[0], dispatch_scalar<typ>())) { LINEARIZE_DATA_t matrix_in_ld; LINEARIZE_DATA_t eigenvectors_out_ld; LINEARIZE_DATA_t eigenvalues_out_ld; @@ -1483,106 +1524,98 @@ static NPY_INLINE void for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ - linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld); - not_ok = call_@lapack_func@(&eigh_params); + linearize_matrix((typ*)eigh_params.A, (typ*)args[0], &matrix_in_ld); + not_ok = call_evd(&eigh_params); if (!not_ok) { /* lapack ok, copy result out */ - delinearize_@BASETYPE@_matrix(args[1], - eigh_params.W, + delinearize_matrix((basetyp*)args[1], + (basetyp*)eigh_params.W, &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { - delinearize_@TYPE@_matrix(args[2], - eigh_params.A, + delinearize_matrix((typ*)args[2], + (typ*)eigh_params.A, &eigenvectors_out_ld); } } else { /* lapack fail, set result to nan */ error_occurred = 1; - nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld); + nan_matrix((basetyp*)args[1], &eigenvalues_out_ld); if ('V' == eigh_params.JOBZ) { - nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld); + nan_matrix((typ*)args[2], &eigenvectors_out_ld); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } - release_@lapack_func@(&eigh_params); + release_evd(&eigh_params); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat**/ -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - */ +template<typename typ> static void -@TYPE@_eighlo(char **args, +eighlo(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps); + eigh_wrapper<typ>('V', 'L', args, dimensions, steps); } +template<typename typ> static void -@TYPE@_eighup(char **args, +eighup(char **args, npy_intp const *dimensions, npy_intp const *steps, void* NPY_UNUSED(func)) { - @TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps); + eigh_wrapper<typ>('V', 'U', args, dimensions, steps); } +template<typename typ> static void -@TYPE@_eigvalshlo(char **args, +eigvalshlo(char **args, npy_intp const *dimensions, npy_intp const *steps, void* NPY_UNUSED(func)) { - @TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps); + eigh_wrapper<typ>('N', 'L', args, dimensions, steps); } +template<typename typ> static void -@TYPE@_eigvalshup(char **args, +eigvalshup(char **args, npy_intp const *dimensions, npy_intp const *steps, void* NPY_UNUSED(func)) { - @TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps); + eigh_wrapper<typ>('N', 'U', args, dimensions, steps); } -/**end repeat**/ /* -------------------------------------------------------------------------- */ /* Solve family (includes inv) */ -typedef struct gesv_params_struct +template<typename typ> +struct GESV_PARAMS_t { - void *A; /* A is (N, N) of base type */ - void *B; /* B is (N, NRHS) of base type */ + typ *A; /* A is (N, N) of base type */ + typ *B; /* B is (N, NRHS) of base type */ fortran_int * IPIV; /* IPIV is (N) */ fortran_int N; fortran_int NRHS; fortran_int LDA; fortran_int LDB; -} GESV_PARAMS_t; - -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# - #ftyp = fortran_real, fortran_doublereal, - fortran_complex, fortran_doublecomplex# - #lapack_func = sgesv, dgesv, cgesv, zgesv# -*/ +}; -static NPY_INLINE fortran_int -call_@lapack_func@(GESV_PARAMS_t *params) +static inline fortran_int +call_gesv(GESV_PARAMS_t<fortran_real> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->N, ¶ms->NRHS, + LAPACK(sgesv)(¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->IPIV, params->B, ¶ms->LDB, @@ -1590,30 +1623,68 @@ call_@lapack_func@(GESV_PARAMS_t *params) return rv; } +static inline fortran_int +call_gesv(GESV_PARAMS_t<fortran_doublereal> *params) +{ + fortran_int rv; + LAPACK(dgesv)(¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->IPIV, + params->B, ¶ms->LDB, + &rv); + return rv; +} + +static inline fortran_int +call_gesv(GESV_PARAMS_t<fortran_complex> *params) +{ + fortran_int rv; + LAPACK(cgesv)(¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->IPIV, + params->B, ¶ms->LDB, + &rv); + return rv; +} + +static inline fortran_int +call_gesv(GESV_PARAMS_t<fortran_doublecomplex> *params) +{ + fortran_int rv; + LAPACK(zgesv)(¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->IPIV, + params->B, ¶ms->LDB, + &rv); + return rv; +} + + /* * Initialize the parameters to use in for the lapack function _heev * Handles buffer allocation */ -static NPY_INLINE int -init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) +template<typename ftyp> +static inline int +init_gesv(GESV_PARAMS_t<ftyp> *params, fortran_int N, fortran_int NRHS) { npy_uint8 *mem_buff = NULL; npy_uint8 *a, *b, *ipiv; size_t safe_N = N; size_t safe_NRHS = NRHS; fortran_int ld = fortran_int_max(N, 1); - mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@) + - safe_N * safe_NRHS*sizeof(@ftyp@) + + mem_buff = (npy_uint8 *)malloc(safe_N * safe_N * sizeof(ftyp) + + safe_N * safe_NRHS*sizeof(ftyp) + safe_N * sizeof(fortran_int)); if (!mem_buff) { goto error; } a = mem_buff; - b = a + safe_N * safe_N * sizeof(@ftyp@); - ipiv = b + safe_N * safe_NRHS * sizeof(@ftyp@); + b = a + safe_N * safe_N * sizeof(ftyp); + ipiv = b + safe_N * safe_NRHS * sizeof(ftyp); - params->A = a; - params->B = b; + params->A = (ftyp*)a; + params->B = (ftyp*)b; params->IPIV = (fortran_int*)ipiv; params->N = N; params->NRHS = NRHS; @@ -1628,26 +1699,29 @@ init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS) return 0; } -static NPY_INLINE void -release_@lapack_func@(GESV_PARAMS_t *params) +template<typename ftyp> +static inline void +release_gesv(GESV_PARAMS_t<ftyp> *params) { /* memory block base is in A */ free(params->A); memset(params, 0, sizeof(*params)); } +template<typename typ> static void -@TYPE@_solve(char **args, npy_intp const *dimensions, npy_intp const *steps, +solve(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GESV_PARAMS_t params; +using ftyp = fortran_type_t<typ>; + GESV_PARAMS_t<ftyp> params; fortran_int n, nrhs; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; nrhs = (fortran_int)dimensions[1]; - if (init_@lapack_func@(¶ms, n, nrhs)) { + if (init_gesv(¶ms, n, nrhs)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); @@ -1656,34 +1730,37 @@ static void BEGIN_OUTER_LOOP_3 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.B, args[1], &b_in); - not_ok =call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.B, (typ*)args[1], &b_in); + not_ok =call_gesv(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[2], params.B, &r_out); + delinearize_matrix((typ*)args[2], (typ*)params.B, &r_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[2], &r_out); + nan_matrix((typ*)args[2], &r_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_gesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } + +template<typename typ> static void -@TYPE@_solve1(char **args, npy_intp const *dimensions, npy_intp const *steps, +solve1(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GESV_PARAMS_t params; +using ftyp = fortran_type_t<typ>; + GESV_PARAMS_t<ftyp> params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_3 n = (fortran_int)dimensions[0]; - if (init_@lapack_func@(¶ms, n, 1)) { + if (init_gesv(¶ms, n, 1)) { LINEARIZE_DATA_t a_in, b_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&b_in, 1, n, 1, steps[2]); @@ -1691,105 +1768,130 @@ static void BEGIN_OUTER_LOOP_3 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.B, args[1], &b_in); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.B, (typ*)args[1], &b_in); + not_ok = call_gesv(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[2], params.B, &r_out); + delinearize_matrix((typ*)args[2], (typ*)params.B, &r_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[2], &r_out); + nan_matrix((typ*)args[2], &r_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_gesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } +template<typename typ> static void -@TYPE@_inv(char **args, npy_intp const *dimensions, npy_intp const *steps, +inv(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GESV_PARAMS_t params; +using ftyp = fortran_type_t<typ>; + GESV_PARAMS_t<ftyp> params; fortran_int n; int error_occurred = get_fp_invalid_and_clear(); INIT_OUTER_LOOP_2 n = (fortran_int)dimensions[0]; - if (init_@lapack_func@(¶ms, n, n)) { + if (init_gesv(¶ms, n, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - identity_@TYPE@_matrix(params.B, n); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + identity_matrix((typ*)params.B, n); + not_ok = call_gesv(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[1], params.B, &r_out); + delinearize_matrix((typ*)args[1], (typ*)params.B, &r_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[1], &r_out); + nan_matrix((typ*)args[1], &r_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_gesv(¶ms); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* Cholesky decomposition */ -typedef struct potr_params_struct +template<typename typ> +struct POTR_PARAMS_t { - void *A; + typ *A; fortran_int N; fortran_int LDA; char UPLO; -} POTR_PARAMS_t; +}; -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #ftyp = fortran_real, fortran_doublereal, - fortran_complex, fortran_doublecomplex# - #lapack_func = spotrf, dpotrf, cpotrf, zpotrf# - */ +static inline fortran_int +call_potrf(POTR_PARAMS_t<fortran_real> *params) +{ + fortran_int rv; + LAPACK(spotrf)(¶ms->UPLO, + ¶ms->N, params->A, ¶ms->LDA, + &rv); + return rv; +} + +static inline fortran_int +call_potrf(POTR_PARAMS_t<fortran_doublereal> *params) +{ + fortran_int rv; + LAPACK(dpotrf)(¶ms->UPLO, + ¶ms->N, params->A, ¶ms->LDA, + &rv); + return rv; +} + +static inline fortran_int +call_potrf(POTR_PARAMS_t<fortran_complex> *params) +{ + fortran_int rv; + LAPACK(cpotrf)(¶ms->UPLO, + ¶ms->N, params->A, ¶ms->LDA, + &rv); + return rv; +} -static NPY_INLINE fortran_int -call_@lapack_func@(POTR_PARAMS_t *params) +static inline fortran_int +call_potrf(POTR_PARAMS_t<fortran_doublecomplex> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->UPLO, + LAPACK(zpotrf)(¶ms->UPLO, ¶ms->N, params->A, ¶ms->LDA, &rv); return rv; } -static NPY_INLINE int -init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N) +template<typename ftyp> +static inline int +init_potrf(POTR_PARAMS_t<ftyp> *params, char UPLO, fortran_int N) { npy_uint8 *mem_buff = NULL; npy_uint8 *a; size_t safe_N = N; fortran_int lda = fortran_int_max(N, 1); - mem_buff = malloc(safe_N * safe_N * sizeof(@ftyp@)); + mem_buff = (npy_uint8 *)malloc(safe_N * safe_N * sizeof(ftyp)); if (!mem_buff) { goto error; } a = mem_buff; - params->A = a; + params->A = (ftyp*)a; params->N = N; params->LDA = lda; params->UPLO = UPLO; @@ -1802,18 +1904,21 @@ init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N) return 0; } -static NPY_INLINE void -release_@lapack_func@(POTR_PARAMS_t *params) +template<typename ftyp> +static inline void +release_potrf(POTR_PARAMS_t<ftyp> *params) { /* memory block base in A */ free(params->A); memset(params, 0, sizeof(*params)); } +template<typename typ> static void -@TYPE@_cholesky(char uplo, char **args, npy_intp const *dimensions, npy_intp const *steps) +cholesky(char uplo, char **args, npy_intp const *dimensions, npy_intp const *steps) { - POTR_PARAMS_t params; + using ftyp = fortran_type_t<typ>; + POTR_PARAMS_t<ftyp> params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n; INIT_OUTER_LOOP_2 @@ -1821,50 +1926,50 @@ static void assert(uplo == 'L'); n = (fortran_int)dimensions[0]; - if (init_@lapack_func@(¶ms, uplo, n)) { + if (init_potrf(¶ms, uplo, n)) { LINEARIZE_DATA_t a_in, r_out; init_linearize_data(&a_in, n, n, steps[1], steps[0]); init_linearize_data(&r_out, n, n, steps[3], steps[2]); BEGIN_OUTER_LOOP_2 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix(params.A, (ftyp*)args[0], &a_in); + not_ok = call_potrf(¶ms); if (!not_ok) { - triu_@TYPE@_matrix(params.A, params.N); - delinearize_@TYPE@_matrix(args[1], params.A, &r_out); + triu_matrix(params.A, params.N); + delinearize_matrix((ftyp*)args[1], params.A, &r_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[1], &r_out); + nan_matrix((ftyp*)args[1], &r_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_potrf(¶ms); } set_fp_invalid_or_clear(error_occurred); } +template<typename typ> static void -@TYPE@_cholesky_lo(char **args, npy_intp const *dimensions, npy_intp const *steps, +cholesky_lo(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_cholesky('L', args, dimensions, steps); + cholesky<typ>('L', args, dimensions, steps); } -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* eig family */ -typedef struct geev_params_struct { - void *A; - void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ - void *WI; - void *VLR; /* REAL VL buffers for _geev where _ is s, d */ - void *VRR; /* REAL VR buffers for _geev where _ is s, d */ - void *WORK; - void *W; /* final w */ - void *VL; /* final vl */ - void *VR; /* final vr */ +template<typename typ> +struct GEEV_PARAMS_t { + typ *A; + basetype_t<typ> *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/ + typ *WI; + typ *VLR; /* REAL VL buffers for _geev where _ is s, d */ + typ *VRR; /* REAL VR buffers for _geev where _ is s, d */ + typ *WORK; + typ *W; /* final w */ + typ *VL; /* final vl */ + typ *VR; /* final vr */ fortran_int N; fortran_int LDA; @@ -1874,10 +1979,11 @@ typedef struct geev_params_struct { char JOBVL; char JOBVR; -} GEEV_PARAMS_t; +}; -static NPY_INLINE void -dump_geev_params(const char *name, GEEV_PARAMS_t* params) +template<typename typ> +static inline void +dump_geev_params(const char *name, GEEV_PARAMS_t<typ>* params) { TRACE_TXT("\n%s\n" @@ -1922,20 +2028,25 @@ dump_geev_params(const char *name, GEEV_PARAMS_t* params) "JOBVR", params->JOBVR); } -/**begin repeat - #TYPE = FLOAT, DOUBLE# - #CTYPE = CFLOAT, CDOUBLE# - #typ = float, double# - #complextyp = COMPLEX_t, DOUBLECOMPLEX_t# - #lapack_func = sgeev, dgeev# - #zero = 0.0f, 0.0# -*/ +static inline fortran_int +call_geev(GEEV_PARAMS_t<float>* params) +{ + fortran_int rv; + LAPACK(sgeev)(¶ms->JOBVL, ¶ms->JOBVR, + ¶ms->N, params->A, ¶ms->LDA, + params->WR, params->WI, + params->VLR, ¶ms->LDVL, + params->VRR, ¶ms->LDVR, + params->WORK, ¶ms->LWORK, + &rv); + return rv; +} -static NPY_INLINE fortran_int -call_@lapack_func@(GEEV_PARAMS_t* params) +static inline fortran_int +call_geev(GEEV_PARAMS_t<double>* params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, + LAPACK(dgeev)(¶ms->JOBVL, ¶ms->JOBVR, ¶ms->N, params->A, ¶ms->LDA, params->WR, params->WI, params->VLR, ¶ms->LDVL, @@ -1945,18 +2056,21 @@ call_@lapack_func@(GEEV_PARAMS_t* params) return rv; } -static NPY_INLINE int -init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) + +template<typename typ> +static inline int +init_geev(GEEV_PARAMS_t<typ> *params, char jobvl, char jobvr, fortran_int n, +scalar_trait) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr; size_t safe_n = n; - size_t a_size = safe_n * safe_n * sizeof(@typ@); - size_t wr_size = safe_n * sizeof(@typ@); - size_t wi_size = safe_n * sizeof(@typ@); - size_t vlr_size = jobvl=='V' ? safe_n * safe_n * sizeof(@typ@) : 0; - size_t vrr_size = jobvr=='V' ? safe_n * safe_n * sizeof(@typ@) : 0; + size_t a_size = safe_n * safe_n * sizeof(typ); + size_t wr_size = safe_n * sizeof(typ); + size_t wi_size = safe_n * sizeof(typ); + size_t vlr_size = jobvl=='V' ? safe_n * safe_n * sizeof(typ) : 0; + size_t vrr_size = jobvr=='V' ? safe_n * safe_n * sizeof(typ) : 0; size_t w_size = wr_size*2; size_t vl_size = vlr_size*2; size_t vr_size = vrr_size*2; @@ -1964,7 +2078,7 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) fortran_int ld = fortran_int_max(n, 1); /* allocate data for known sizes (all but work) */ - mem_buff = malloc(a_size + wr_size + wi_size + + mem_buff = (npy_uint8 *)malloc(a_size + wr_size + wi_size + vlr_size + vrr_size + w_size + vl_size + vr_size); if (!mem_buff) { @@ -1980,14 +2094,14 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) vl = w + w_size; vr = vl + vl_size; - params->A = a; - params->WR = wr; - params->WI = wi; - params->VLR = vlr; - params->VRR = vrr; - params->W = w; - params->VL = vl; - params->VR = vr; + params->A = (typ*)a; + params->WR = (typ*)wr; + params->WI = (typ*)wi; + params->VLR = (typ*)vlr; + params->VRR = (typ*)vrr; + params->W = (typ*)w; + params->VL = (typ*)vl; + params->VR = (typ*)vr; params->N = n; params->LDA = ld; params->LDVL = ld; @@ -1997,26 +2111,26 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) /* Work size query */ { - @typ@ work_size_query; + typ work_size_query; params->LWORK = -1; params->WORK = &work_size_query; - if (call_@lapack_func@(params) != 0) { + if (call_geev(params) != 0) { goto error; } work_count = (size_t)work_size_query; } - mem_buff2 = malloc(work_count*sizeof(@typ@)); + mem_buff2 = (npy_uint8 *)malloc(work_count*sizeof(typ)); if (!mem_buff2) { goto error; } work = mem_buff2; params->LWORK = (fortran_int)work_count; - params->WORK = work; + params->WORK = (typ*)work; return 1; error: @@ -2027,42 +2141,45 @@ init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n) return 0; } -static NPY_INLINE void -mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n) +template<typename complextyp, typename typ> +static inline void +mk_complex_array_from_real(complextyp *c, const typ *re, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { - c[iter].array[0] = re[iter]; - c[iter].array[1] = @zero@; + c[iter].r = re[iter]; + c[iter].i = numeric_limits<typ>::zero; } } -static NPY_INLINE void -mk_@TYPE@_complex_array(@complextyp@ *c, - const @typ@ *re, - const @typ@ *im, +template<typename complextyp, typename typ> +static inline void +mk_complex_array(complextyp *c, + const typ *re, + const typ *im, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { - c[iter].array[0] = re[iter]; - c[iter].array[1] = im[iter]; + c[iter].r = re[iter]; + c[iter].i = im[iter]; } } -static NPY_INLINE void -mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c, - const @typ@ *r, +template<typename complextyp, typename typ> +static inline void +mk_complex_array_conjugate_pair(complextyp *c, + const typ *r, size_t n) { size_t iter; for (iter = 0; iter < n; ++iter) { - @typ@ re = r[iter]; - @typ@ im = r[iter+n]; - c[iter].array[0] = re; - c[iter].array[1] = im; - c[iter+n].array[0] = re; - c[iter+n].array[1] = -im; + typ re = r[iter]; + typ im = r[iter+n]; + c[iter].r = re; + c[iter].i = im; + c[iter+n].r = re; + c[iter+n].i = -im; } } @@ -2073,24 +2190,25 @@ mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c, * i is the eigenvalue imaginary part produced by sgeev/zgeev * n is so that the order of the matrix is n by n */ -static NPY_INLINE void -mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c, - const @typ@ *r, - const @typ@ *i, +template<typename complextyp, typename typ> +static inline void +mk_geev_complex_eigenvectors(complextyp *c, + const typ *r, + const typ *i, size_t n) { size_t iter = 0; while (iter < n) { - if (i[iter] == @zero@) { + if (i[iter] == numeric_limits<typ>::zero) { /* eigenvalue was real, eigenvectors as well... */ - mk_@TYPE@_complex_array_from_real(c, r, n); + mk_complex_array_from_real(c, r, n); c += n; r += n; iter ++; } else { /* eigenvalue was complex, generate a pair of eigenvectors */ - mk_@TYPE@_complex_array_conjugate_pair(c, r, n); + mk_complex_array_conjugate_pair(c, r, n); c += 2*n; r += 2*n; iter += 2; @@ -2099,43 +2217,49 @@ mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c, } -static NPY_INLINE void -process_@lapack_func@_results(GEEV_PARAMS_t *params) +template<typename complextyp, typename typ> +static inline void +process_geev_results(GEEV_PARAMS_t<typ> *params, scalar_trait) { /* REAL versions of geev need the results to be translated * into complex versions. This is the way to deal with imaginary * results. In our gufuncs we will always return complex arrays! */ - mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N); + mk_complex_array((complextyp*)params->W, (typ*)params->WR, (typ*)params->WI, params->N); /* handle the eigenvectors */ if ('V' == params->JOBVL) { - mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR, - params->WI, params->N); + mk_geev_complex_eigenvectors((complextyp*)params->VL, (typ*)params->VLR, + (typ*)params->WI, params->N); } if ('V' == params->JOBVR) { - mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR, - params->WI, params->N); + mk_geev_complex_eigenvectors((complextyp*)params->VR, (typ*)params->VRR, + (typ*)params->WI, params->N); } } -/**end repeat**/ +static inline fortran_int +call_geev(GEEV_PARAMS_t<fortran_complex>* params) +{ + fortran_int rv; -/**begin repeat - #TYPE = CFLOAT, CDOUBLE# - #typ = COMPLEX_t, DOUBLECOMPLEX_t# - #ftyp = fortran_complex, fortran_doublecomplex# - #realtyp = float, double# - #lapack_func = cgeev, zgeev# - */ - -static NPY_INLINE fortran_int -call_@lapack_func@(GEEV_PARAMS_t* params) + LAPACK(cgeev)(¶ms->JOBVL, ¶ms->JOBVR, + ¶ms->N, params->A, ¶ms->LDA, + params->W, + params->VL, ¶ms->LDVL, + params->VR, ¶ms->LDVR, + params->WORK, ¶ms->LWORK, + params->WR, /* actually RWORK */ + &rv); + return rv; +} +static inline fortran_int +call_geev(GEEV_PARAMS_t<fortran_doublecomplex>* params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->JOBVL, ¶ms->JOBVR, + LAPACK(zgeev)(¶ms->JOBVL, ¶ms->JOBVR, ¶ms->N, params->A, ¶ms->LDA, params->W, params->VL, ¶ms->LDVL, @@ -2146,26 +2270,28 @@ call_@lapack_func@(GEEV_PARAMS_t* params) return rv; } -static NPY_INLINE int -init_@lapack_func@(GEEV_PARAMS_t* params, +template<typename ftyp> +static inline int +init_geev(GEEV_PARAMS_t<ftyp>* params, char jobvl, char jobvr, - fortran_int n) + fortran_int n, complex_trait) { +using realtyp = basetype_t<ftyp>; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *w, *vl, *vr, *work, *rwork; size_t safe_n = n; - size_t a_size = safe_n * safe_n * sizeof(@ftyp@); - size_t w_size = safe_n * sizeof(@ftyp@); - size_t vl_size = jobvl=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0; - size_t vr_size = jobvr=='V'? safe_n * safe_n * sizeof(@ftyp@) : 0; - size_t rwork_size = 2 * safe_n * sizeof(@realtyp@); + size_t a_size = safe_n * safe_n * sizeof(ftyp); + size_t w_size = safe_n * sizeof(ftyp); + size_t vl_size = jobvl=='V'? safe_n * safe_n * sizeof(ftyp) : 0; + size_t vr_size = jobvr=='V'? safe_n * safe_n * sizeof(ftyp) : 0; + size_t rwork_size = 2 * safe_n * sizeof(realtyp); size_t work_count = 0; size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; fortran_int ld = fortran_int_max(n, 1); - mem_buff = malloc(total_size); + mem_buff = (npy_uint8 *)malloc(total_size); if (!mem_buff) { goto error; } @@ -2176,14 +2302,14 @@ init_@lapack_func@(GEEV_PARAMS_t* params, vr = vl + vl_size; rwork = vr + vr_size; - params->A = a; - params->WR = rwork; + params->A = (ftyp*)a; + params->WR = (realtyp*)rwork; params->WI = NULL; params->VLR = NULL; params->VRR = NULL; - params->VL = vl; - params->VR = vr; - params->W = w; + params->VL = (ftyp*)vl; + params->VR = (ftyp*)vr; + params->W = (ftyp*)w; params->N = n; params->LDA = ld; params->LDVL = ld; @@ -2193,21 +2319,21 @@ init_@lapack_func@(GEEV_PARAMS_t* params, /* Work size query */ { - @typ@ work_size_query; + ftyp work_size_query; params->LWORK = -1; params->WORK = &work_size_query; - if (call_@lapack_func@(params) != 0) { + if (call_geev(params) != 0) { goto error; } - work_count = (size_t) work_size_query.array[0]; + work_count = (size_t) work_size_query.r; /* Fix a bug in lapack 3.0.0 */ if(work_count == 0) work_count = 1; } - mem_buff2 = malloc(work_count*sizeof(@ftyp@)); + mem_buff2 = (npy_uint8 *)malloc(work_count*sizeof(ftyp)); if (!mem_buff2) { goto error; } @@ -2215,7 +2341,7 @@ init_@lapack_func@(GEEV_PARAMS_t* params, work = mem_buff2; params->LWORK = (fortran_int)work_count; - params->WORK = work; + params->WORK = (ftyp*)work; return 1; error: @@ -2226,32 +2352,27 @@ init_@lapack_func@(GEEV_PARAMS_t* params, return 0; } - -static NPY_INLINE void -process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params)) +template<typename complextyp, typename typ> +static inline void +process_geev_results(GEEV_PARAMS_t<typ> *NPY_UNUSED(params), complex_trait) { /* nothing to do here, complex versions are ready to copy out */ } -/**end repeat**/ -/**begin repeat - #TYPE = FLOAT, DOUBLE, CDOUBLE# - #COMPLEXTYPE = CFLOAT, CDOUBLE, CDOUBLE# - #ftype = fortran_real, fortran_doublereal, fortran_doublecomplex# - #lapack_func = sgeev, dgeev, zgeev# - */ -static NPY_INLINE void -release_@lapack_func@(GEEV_PARAMS_t *params) +template<typename typ> +static inline void +release_geev(GEEV_PARAMS_t<typ> *params) { free(params->WORK); free(params->A); memset(params, 0, sizeof(*params)); } -static NPY_INLINE void -@TYPE@_eig_wrapper(char JOBVL, +template<typename fctype, typename ftype> +static inline void +eig_wrapper(char JOBVL, char JOBVR, char**args, npy_intp const *dimensions, @@ -2262,7 +2383,7 @@ static NPY_INLINE void size_t outer_dim = *dimensions++; size_t op_count = 2; int error_occurred = get_fp_invalid_and_clear(); - GEEV_PARAMS_t geev_params; + GEEV_PARAMS_t<ftype> geev_params; assert(JOBVL == 'N'); @@ -2275,9 +2396,9 @@ static NPY_INLINE void } steps += op_count; - if (init_@lapack_func@(&geev_params, + if (init_geev(&geev_params, JOBVL, JOBVR, - (fortran_int)dimensions[0])) { + (fortran_int)dimensions[0], dispatch_scalar<ftype>())) { LINEARIZE_DATA_t a_in; LINEARIZE_DATA_t w_out; LINEARIZE_DATA_t vl_out; @@ -2307,78 +2428,81 @@ static NPY_INLINE void int not_ok; char **arg_iter = args; /* copy the matrix in */ - linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in); - not_ok = call_@lapack_func@(&geev_params); + linearize_matrix((ftype*)geev_params.A, (ftype*)*arg_iter++, &a_in); + not_ok = call_geev(&geev_params); if (!not_ok) { - process_@lapack_func@_results(&geev_params); - delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, - geev_params.W, + process_geev_results<fctype>(&geev_params, +dispatch_scalar<ftype>{}); + delinearize_matrix((fctype*)*arg_iter++, + (fctype*)geev_params.W, &w_out); if ('V' == geev_params.JOBVL) { - delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, - geev_params.VL, + delinearize_matrix((fctype*)*arg_iter++, + (fctype*)geev_params.VL, &vl_out); } if ('V' == geev_params.JOBVR) { - delinearize_@COMPLEXTYPE@_matrix(*arg_iter++, - geev_params.VR, + delinearize_matrix((fctype*)*arg_iter++, + (fctype*)geev_params.VR, &vr_out); } } else { /* geev failed */ error_occurred = 1; - nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out); + nan_matrix((fctype*)*arg_iter++, &w_out); if ('V' == geev_params.JOBVL) { - nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out); + nan_matrix((fctype*)*arg_iter++, &vl_out); } if ('V' == geev_params.JOBVR) { - nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out); + nan_matrix((fctype*)*arg_iter++, &vr_out); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } - release_@lapack_func@(&geev_params); + release_geev(&geev_params); } set_fp_invalid_or_clear(error_occurred); } +template<typename fctype, typename ftype> static void -@TYPE@_eig(char **args, +eig(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_eig_wrapper('N', 'V', args, dimensions, steps); + eig_wrapper<fctype, ftype>('N', 'V', args, dimensions, steps); } +template<typename fctype, typename ftype> static void -@TYPE@_eigvals(char **args, +eigvals(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_eig_wrapper('N', 'N', args, dimensions, steps); + eig_wrapper<fctype, ftype>('N', 'N', args, dimensions, steps); } -/**end repeat**/ /* -------------------------------------------------------------------------- */ /* singular value decomposition */ -typedef struct gessd_params_struct +template<typename ftyp> +struct GESDD_PARAMS_t { - void *A; - void *S; - void *U; - void *VT; - void *WORK; - void *RWORK; - void *IWORK; + ftyp *A; + basetype_t<ftyp> *S; + ftyp *U; + ftyp *VT; + ftyp *WORK; + basetype_t<ftyp> *RWORK; + fortran_int *IWORK; fortran_int M; fortran_int N; @@ -2387,12 +2511,13 @@ typedef struct gessd_params_struct fortran_int LDVT; fortran_int LWORK; char JOBZ; -} GESDD_PARAMS_t; +} ; -static NPY_INLINE void +template<typename ftyp> +static inline void dump_gesdd_params(const char *name, - GESDD_PARAMS_t *params) + GESDD_PARAMS_t<ftyp> *params) { TRACE_TXT("\n%s:\n"\ @@ -2433,7 +2558,7 @@ dump_gesdd_params(const char *name, "JOBZ", ' ', params->JOBZ); } -static NPY_INLINE int +static inline int compute_urows_vtcolumns(char jobz, fortran_int m, fortran_int n, fortran_int *urows, fortran_int *vtcolumns) @@ -2462,43 +2587,51 @@ compute_urows_vtcolumns(char jobz, return 1; } - -/**begin repeat - #TYPE = FLOAT, DOUBLE# - #lapack_func = sgesdd, dgesdd# - #ftyp = fortran_real, fortran_doublereal# - */ - -static NPY_INLINE fortran_int -call_@lapack_func@(GESDD_PARAMS_t *params) +static inline fortran_int +call_gesdd(GESDD_PARAMS_t<fortran_real> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, + LAPACK(sgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->S, params->U, ¶ms->LDU, params->VT, ¶ms->LDVT, params->WORK, ¶ms->LWORK, - params->IWORK, + (fortran_int*)params->IWORK, + &rv); + return rv; +} +static inline fortran_int +call_gesdd(GESDD_PARAMS_t<fortran_doublereal> *params) +{ + fortran_int rv; + LAPACK(dgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, + params->A, ¶ms->LDA, + params->S, + params->U, ¶ms->LDU, + params->VT, ¶ms->LDVT, + params->WORK, ¶ms->LWORK, + (fortran_int*)params->IWORK, &rv); return rv; } -static NPY_INLINE int -init_@lapack_func@(GESDD_PARAMS_t *params, +template<typename ftyp> +static inline int +init_gesdd(GESDD_PARAMS_t<ftyp> *params, char jobz, fortran_int m, - fortran_int n) + fortran_int n, scalar_trait) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *s, *u, *vt, *work, *iwork; size_t safe_m = m; size_t safe_n = n; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); fortran_int min_m_n = fortran_int_min(m, n); size_t safe_min_m_n = min_m_n; - size_t s_size = safe_min_m_n * sizeof(@ftyp@); + size_t s_size = safe_min_m_n * sizeof(ftyp); fortran_int u_row_count, vt_column_count; size_t safe_u_row_count, safe_vt_column_count; size_t u_size, vt_size; @@ -2514,10 +2647,10 @@ init_@lapack_func@(GESDD_PARAMS_t *params, safe_u_row_count = u_row_count; safe_vt_column_count = vt_column_count; - u_size = safe_u_row_count * safe_m * sizeof(@ftyp@); - vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@); + u_size = safe_u_row_count * safe_m * sizeof(ftyp); + vt_size = safe_n * safe_vt_column_count * sizeof(ftyp); - mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size); + mem_buff = (npy_uint8 *)malloc(a_size + s_size + u_size + vt_size + iwork_size); if (!mem_buff) { goto error; @@ -2534,12 +2667,12 @@ init_@lapack_func@(GESDD_PARAMS_t *params, params->M = m; params->N = n; - params->A = a; - params->S = s; - params->U = u; - params->VT = vt; + params->A = (ftyp*)a; + params->S = (ftyp*)s; + params->U = (ftyp*)u; + params->VT = (ftyp*)vt; params->RWORK = NULL; - params->IWORK = iwork; + params->IWORK = (fortran_int*)iwork; params->LDA = ld; params->LDU = ld; params->LDVT = vt_column_count; @@ -2547,22 +2680,22 @@ init_@lapack_func@(GESDD_PARAMS_t *params, /* Work size query */ { - @ftyp@ work_size_query; + ftyp work_size_query; params->LWORK = -1; params->WORK = &work_size_query; - if (call_@lapack_func@(params) != 0) { + if (call_gesdd(params) != 0) { goto error; } work_count = (fortran_int)work_size_query; /* Fix a bug in lapack 3.0.0 */ if(work_count == 0) work_count = 1; - work_size = (size_t)work_count * sizeof(@ftyp@); + work_size = (size_t)work_count * sizeof(ftyp); } - mem_buff2 = malloc(work_size); + mem_buff2 = (npy_uint8 *)malloc(work_size); if (!mem_buff2) { goto error; } @@ -2570,7 +2703,7 @@ init_@lapack_func@(GESDD_PARAMS_t *params, work = mem_buff2; params->LWORK = work_count; - params->WORK = work; + params->WORK = (ftyp*)work; return 1; error: @@ -2582,21 +2715,26 @@ init_@lapack_func@(GESDD_PARAMS_t *params, return 0; } -/**end repeat**/ - -/**begin repeat - #TYPE = CFLOAT, CDOUBLE# - #ftyp = fortran_complex, fortran_doublecomplex# - #frealtyp = fortran_real, fortran_doublereal# - #typ = COMPLEX_t, DOUBLECOMPLEX_t# - #lapack_func = cgesdd, zgesdd# - */ - -static NPY_INLINE fortran_int -call_@lapack_func@(GESDD_PARAMS_t *params) +static inline fortran_int +call_gesdd(GESDD_PARAMS_t<fortran_complex> *params) +{ + fortran_int rv; + LAPACK(cgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, + params->A, ¶ms->LDA, + params->S, + params->U, ¶ms->LDU, + params->VT, ¶ms->LDVT, + params->WORK, ¶ms->LWORK, + params->RWORK, + params->IWORK, + &rv); + return rv; +} +static inline fortran_int +call_gesdd(GESDD_PARAMS_t<fortran_doublecomplex> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->JOBZ, ¶ms->M, ¶ms->N, + LAPACK(zgesdd)(¶ms->JOBZ, ¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->S, params->U, ¶ms->LDU, @@ -2608,12 +2746,14 @@ call_@lapack_func@(GESDD_PARAMS_t *params) return rv; } -static NPY_INLINE int -init_@lapack_func@(GESDD_PARAMS_t *params, +template<typename ftyp> +static inline int +init_gesdd(GESDD_PARAMS_t<ftyp> *params, char jobz, fortran_int m, - fortran_int n) + fortran_int n, complex_trait) { +using frealtyp = basetype_t<ftyp>; npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL; npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork; size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size; @@ -2632,17 +2772,17 @@ init_@lapack_func@(GESDD_PARAMS_t *params, safe_u_row_count = u_row_count; safe_vt_column_count = vt_column_count; - a_size = safe_m * safe_n * sizeof(@ftyp@); - s_size = safe_min_m_n * sizeof(@frealtyp@); - u_size = safe_u_row_count * safe_m * sizeof(@ftyp@); - vt_size = safe_n * safe_vt_column_count * sizeof(@ftyp@); + a_size = safe_m * safe_n * sizeof(ftyp); + s_size = safe_min_m_n * sizeof(frealtyp); + u_size = safe_u_row_count * safe_m * sizeof(ftyp); + vt_size = safe_n * safe_vt_column_count * sizeof(ftyp); rwork_size = 'N'==jobz? (7 * safe_min_m_n) : (5*safe_min_m_n * safe_min_m_n + 5*safe_min_m_n); - rwork_size *= sizeof(@ftyp@); + rwork_size *= sizeof(ftyp); iwork_size = 8 * safe_min_m_n* sizeof(fortran_int); - mem_buff = malloc(a_size + + mem_buff = (npy_uint8 *)malloc(a_size + s_size + u_size + vt_size + @@ -2662,12 +2802,12 @@ init_@lapack_func@(GESDD_PARAMS_t *params, /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */ vt_column_count = fortran_int_max(1, vt_column_count); - params->A = a; - params->S = s; - params->U = u; - params->VT = vt; - params->RWORK = rwork; - params->IWORK = iwork; + params->A = (ftyp*)a; + params->S = (frealtyp*)s; + params->U = (ftyp*)u; + params->VT = (ftyp*)vt; + params->RWORK = (frealtyp*)rwork; + params->IWORK = (fortran_int*)iwork; params->M = m; params->N = n; params->LDA = ld; @@ -2677,22 +2817,22 @@ init_@lapack_func@(GESDD_PARAMS_t *params, /* Work size query */ { - @ftyp@ work_size_query; + ftyp work_size_query; params->LWORK = -1; params->WORK = &work_size_query; - if (call_@lapack_func@(params) != 0) { + if (call_gesdd(params) != 0) { goto error; } - work_count = (fortran_int)((@typ@*)&work_size_query)->array[0]; + work_count = (fortran_int)(*(frealtyp*)&work_size_query); /* Fix a bug in lapack 3.0.0 */ if(work_count == 0) work_count = 1; - work_size = (size_t)work_count * sizeof(@ftyp@); + work_size = (size_t)work_count * sizeof(ftyp); } - mem_buff2 = malloc(work_size); + mem_buff2 = (npy_uint8 *)malloc(work_size); if (!mem_buff2) { goto error; } @@ -2700,7 +2840,7 @@ init_@lapack_func@(GESDD_PARAMS_t *params, work = mem_buff2; params->LWORK = work_count; - params->WORK = work; + params->WORK = (ftyp*)work; return 1; error: @@ -2711,16 +2851,10 @@ init_@lapack_func@(GESDD_PARAMS_t *params, return 0; } -/**end repeat**/ - -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - #REALTYPE = FLOAT, DOUBLE, FLOAT, DOUBLE# - #lapack_func = sgesdd, dgesdd, cgesdd, zgesdd# - */ -static NPY_INLINE void -release_@lapack_func@(GESDD_PARAMS_t* params) +template<typename typ> +static inline void +release_gesdd(GESDD_PARAMS_t<typ>* params) { /* A and WORK contain allocated blocks */ free(params->A); @@ -2728,28 +2862,31 @@ release_@lapack_func@(GESDD_PARAMS_t* params) memset(params, 0, sizeof(*params)); } -static NPY_INLINE void -@TYPE@_svd_wrapper(char JOBZ, +template<typename typ> +static inline void +svd_wrapper(char JOBZ, char **args, npy_intp const *dimensions, npy_intp const *steps) { +using basetyp = basetype_t<typ>; ptrdiff_t outer_steps[4]; int error_occurred = get_fp_invalid_and_clear(); size_t iter; size_t outer_dim = *dimensions++; size_t op_count = (JOBZ=='N')?2:4; - GESDD_PARAMS_t params; + GESDD_PARAMS_t<typ> params; for (iter = 0; iter < op_count; ++iter) { outer_steps[iter] = (ptrdiff_t) steps[iter]; } steps += op_count; - if (init_@lapack_func@(¶ms, - JOBZ, - (fortran_int)dimensions[0], - (fortran_int)dimensions[1])) { + if (init_gesdd(¶ms, + JOBZ, + (fortran_int)dimensions[0], + (fortran_int)dimensions[1], +dispatch_scalar<typ>())) { LINEARIZE_DATA_t a_in, u_out, s_out, v_out; fortran_int min_m_n = params.M < params.N ? params.M : params.N; @@ -2780,97 +2917,95 @@ static NPY_INLINE void for (iter = 0; iter < outer_dim; ++iter) { int not_ok; /* copy the matrix in */ - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + not_ok = call_gesdd(¶ms); if (!not_ok) { if ('N' == params.JOBZ) { - delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out); + delinearize_matrix((basetyp*)args[1], (basetyp*)params.S, &s_out); } else { if ('A' == params.JOBZ && min_m_n == 0) { /* Lapack has betrayed us and left these uninitialized, * so produce an identity matrix for whichever of u * and v is not empty. */ - identity_@TYPE@_matrix(params.U, params.M); - identity_@TYPE@_matrix(params.VT, params.N); + identity_matrix((typ*)params.U, params.M); + identity_matrix((typ*)params.VT, params.N); } - delinearize_@TYPE@_matrix(args[1], params.U, &u_out); - delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out); - delinearize_@TYPE@_matrix(args[3], params.VT, &v_out); + delinearize_matrix((typ*)args[1], (typ*)params.U, &u_out); + delinearize_matrix((basetyp*)args[2], (basetyp*)params.S, &s_out); + delinearize_matrix((typ*)args[3], (typ*)params.VT, &v_out); } } else { error_occurred = 1; if ('N' == params.JOBZ) { - nan_@REALTYPE@_matrix(args[1], &s_out); + nan_matrix((basetyp*)args[1], &s_out); } else { - nan_@TYPE@_matrix(args[1], &u_out); - nan_@REALTYPE@_matrix(args[2], &s_out); - nan_@TYPE@_matrix(args[3], &v_out); + nan_matrix((typ*)args[1], &u_out); + nan_matrix((basetyp*)args[2], &s_out); + nan_matrix((typ*)args[3], &v_out); } } update_pointers((npy_uint8**)args, outer_steps, op_count); } - release_@lapack_func@(¶ms); + release_gesdd(¶ms); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat*/ -/* svd gufunc entry points */ -/**begin repeat - #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# - */ +template<typename typ> static void -@TYPE@_svd_N(char **args, +svd_N(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_svd_wrapper('N', args, dimensions, steps); + svd_wrapper<fortran_type_t<typ>>('N', args, dimensions, steps); } +template<typename typ> static void -@TYPE@_svd_S(char **args, +svd_S(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_svd_wrapper('S', args, dimensions, steps); + svd_wrapper<fortran_type_t<typ>>('S', args, dimensions, steps); } +template<typename typ> static void -@TYPE@_svd_A(char **args, +svd_A(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - @TYPE@_svd_wrapper('A', args, dimensions, steps); + svd_wrapper<fortran_type_t<typ>>('A', args, dimensions, steps); } -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* qr (modes - r, raw) */ -typedef struct geqfr_params_struct +template<typename typ> +struct GEQRF_PARAMS_t { fortran_int M; fortran_int N; - void *A; + typ *A; fortran_int LDA; - void* TAU; - void *WORK; + typ* TAU; + typ *WORK; fortran_int LWORK; -} GEQRF_PARAMS_t; +}; +template<typename typ> static inline void dump_geqrf_params(const char *name, - GEQRF_PARAMS_t *params) + GEQRF_PARAMS_t<typ> *params) { TRACE_TXT("\n%s:\n"\ @@ -2894,15 +3029,22 @@ dump_geqrf_params(const char *name, "LWORK", (int)params->LWORK); } -/**begin repeat - #lapack_func=dgeqrf,zgeqrf# - */ - static inline fortran_int -call_@lapack_func@(GEQRF_PARAMS_t *params) +call_geqrf(GEQRF_PARAMS_t<double> *params) +{ + fortran_int rv; + LAPACK(dgeqrf)(¶ms->M, ¶ms->N, + params->A, ¶ms->LDA, + params->TAU, + params->WORK, ¶ms->LWORK, + &rv); + return rv; +} +static inline fortran_int +call_geqrf(GEQRF_PARAMS_t<f2c_doublecomplex> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, + LAPACK(zgeqrf)(¶ms->M, ¶ms->N, params->A, ¶ms->LDA, params->TAU, params->WORK, ¶ms->LWORK, @@ -2910,18 +3052,13 @@ call_@lapack_func@(GEQRF_PARAMS_t *params) return rv; } -/**end repeat**/ -/**begin repeat - #TYPE=DOUBLE# - #lapack_func=dgeqrf# - #ftyp=fortran_doublereal# - */ static inline int -init_@lapack_func@(GEQRF_PARAMS_t *params, +init_geqrf(GEQRF_PARAMS_t<fortran_doublereal> *params, fortran_int m, fortran_int n) { +using ftyp = fortran_doublereal; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *tau, *work; @@ -2930,14 +3067,14 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, size_t safe_m = m; size_t safe_n = n; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t tau_size = safe_min_m_n * sizeof(@ftyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); + size_t tau_size = safe_min_m_n * sizeof(ftyp); fortran_int work_count; size_t work_size; fortran_int lda = fortran_int_max(1, m); - mem_buff = malloc(a_size + tau_size); + mem_buff = (npy_uint8 *)malloc(a_size + tau_size); if (!mem_buff) goto error; @@ -2949,35 +3086,35 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, params->M = m; params->N = n; - params->A = a; - params->TAU = tau; + params->A = (ftyp*)a; + params->TAU = (ftyp*)tau; params->LDA = lda; { /* compute optimal work size */ - @ftyp@ work_size_query; + ftyp work_size_query; params->WORK = &work_size_query; params->LWORK = -1; - if (call_@lapack_func@(params) != 0) + if (call_geqrf(params) != 0) goto error; - work_count = (fortran_int) *(@ftyp@*) params->WORK; + work_count = (fortran_int) *(ftyp*) params->WORK; } params->LWORK = fortran_int_max(fortran_int_max(1, n), work_count); - work_size = (size_t) params->LWORK * sizeof(@ftyp@); - mem_buff2 = malloc(work_size); + work_size = (size_t) params->LWORK * sizeof(ftyp); + mem_buff2 = (npy_uint8 *)malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; - params->WORK = work; + params->WORK = (ftyp*)work; return 1; error: @@ -2989,18 +3126,13 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, return 0; } -/**end repeat**/ -/**begin repeat - #TYPE=CDOUBLE# - #lapack_func=zgeqrf# - #ftyp=fortran_doublecomplex# - */ static inline int -init_@lapack_func@(GEQRF_PARAMS_t *params, +init_geqrf(GEQRF_PARAMS_t<fortran_doublecomplex> *params, fortran_int m, fortran_int n) { +using ftyp = fortran_doublecomplex; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *tau, *work; @@ -3009,14 +3141,14 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, size_t safe_m = m; size_t safe_n = n; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t tau_size = safe_min_m_n * sizeof(@ftyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); + size_t tau_size = safe_min_m_n * sizeof(ftyp); fortran_int work_count; size_t work_size; fortran_int lda = fortran_int_max(1, m); - mem_buff = malloc(a_size + tau_size); + mem_buff = (npy_uint8 *)malloc(a_size + tau_size); if (!mem_buff) goto error; @@ -3028,37 +3160,37 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, params->M = m; params->N = n; - params->A = a; - params->TAU = tau; + params->A = (ftyp*)a; + params->TAU = (ftyp*)tau; params->LDA = lda; { /* compute optimal work size */ - @ftyp@ work_size_query; + ftyp work_size_query; params->WORK = &work_size_query; params->LWORK = -1; - if (call_@lapack_func@(params) != 0) + if (call_geqrf(params) != 0) goto error; - work_count = (fortran_int) ((@ftyp@*)params->WORK)->r; + work_count = (fortran_int) ((ftyp*)params->WORK)->r; } params->LWORK = fortran_int_max(fortran_int_max(1, n), work_count); - work_size = (size_t) params->LWORK * sizeof(@ftyp@); + work_size = (size_t) params->LWORK * sizeof(ftyp); - mem_buff2 = malloc(work_size); + mem_buff2 = (npy_uint8 *)malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; - params->WORK = work; + params->WORK = (ftyp*)work; return 1; error: @@ -3070,13 +3202,10 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, return 0; } -/**end repeat**/ -/**begin repeat - #lapack_func=dgeqrf,zgeqrf# - */ +template<typename ftyp> static inline void -release_@lapack_func@(GEQRF_PARAMS_t* params) +release_geqrf(GEQRF_PARAMS_t<ftyp>* params) { /* A and WORK contain allocated blocks */ free(params->A); @@ -3084,22 +3213,14 @@ release_@lapack_func@(GEQRF_PARAMS_t* params) memset(params, 0, sizeof(*params)); } -/**end repeat**/ - -/**begin repeat - #TYPE=DOUBLE,CDOUBLE# - #REALTYPE=DOUBLE,DOUBLE# - #lapack_func=dgeqrf,zgeqrf# - #typ = npy_double,npy_cdouble# - #basetyp = npy_double,npy_double# - #ftyp = fortran_doublereal,fortran_doublecomplex# - #cmplx = 0, 1# - */ +template<typename typ> static void -@TYPE@_qr_r_raw(char **args, npy_intp const *dimensions, npy_intp const *steps, +qr_r_raw(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GEQRF_PARAMS_t params; +using ftyp = fortran_type_t<typ>; + + GEQRF_PARAMS_t<ftyp> params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n, m; @@ -3108,7 +3229,7 @@ static void m = (fortran_int)dimensions[0]; n = (fortran_int)dimensions[1]; - if (init_@lapack_func@(¶ms, m, n)) { + if (init_geqrf(¶ms, m, n)) { LINEARIZE_DATA_t a_in, tau_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); @@ -3116,50 +3237,57 @@ static void BEGIN_OUTER_LOOP_2 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + not_ok = call_geqrf(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[0], params.A, &a_in); - delinearize_@TYPE@_matrix(args[1], params.TAU, &tau_out); + delinearize_matrix((typ*)args[0], (typ*)params.A, &a_in); + delinearize_matrix((typ*)args[1], (typ*)params.TAU, &tau_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[1], &tau_out); + nan_matrix((typ*)args[1], &tau_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_geqrf(¶ms); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat**/ /* -------------------------------------------------------------------------- */ /* qr common code (modes - reduced and complete) */ -typedef struct gqr_params_struct +template<typename typ> +struct GQR_PARAMS_t { fortran_int M; fortran_int MC; fortran_int MN; void* A; - void *Q; + typ *Q; fortran_int LDA; - void* TAU; - void *WORK; + typ* TAU; + typ *WORK; fortran_int LWORK; -} GQR_PARAMS_t; - -/**begin repeat - #lapack_func=dorgqr,zungqr# - */ +} ; static inline fortran_int -call_@lapack_func@(GQR_PARAMS_t *params) +call_gqr(GQR_PARAMS_t<double> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->MC, ¶ms->MN, + LAPACK(dorgqr)(¶ms->M, ¶ms->MC, ¶ms->MN, + params->Q, ¶ms->LDA, + params->TAU, + params->WORK, ¶ms->LWORK, + &rv); + return rv; +} +static inline fortran_int +call_gqr(GQR_PARAMS_t<f2c_doublecomplex> *params) +{ + fortran_int rv; + LAPACK(zungqr)(¶ms->M, ¶ms->MC, ¶ms->MN, params->Q, ¶ms->LDA, params->TAU, params->WORK, ¶ms->LWORK, @@ -3167,18 +3295,13 @@ call_@lapack_func@(GQR_PARAMS_t *params) return rv; } -/**end repeat**/ - -/**begin repeat - #lapack_func=dorgqr# - #ftyp=fortran_doublereal# - */ static inline int -init_@lapack_func@_common(GQR_PARAMS_t *params, +init_gqr_common(GQR_PARAMS_t<fortran_doublereal> *params, fortran_int m, fortran_int n, fortran_int mc) { +using ftyp = fortran_doublereal; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *q, *tau, *work; @@ -3187,15 +3310,15 @@ init_@lapack_func@_common(GQR_PARAMS_t *params, size_t safe_min_m_n = min_m_n; size_t safe_m = m; size_t safe_n = n; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t q_size = safe_m * safe_mc * sizeof(@ftyp@); - size_t tau_size = safe_min_m_n * sizeof(@ftyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); + size_t q_size = safe_m * safe_mc * sizeof(ftyp); + size_t tau_size = safe_min_m_n * sizeof(ftyp); fortran_int work_count; size_t work_size; fortran_int lda = fortran_int_max(1, m); - mem_buff = malloc(q_size + tau_size + a_size); + mem_buff = (npy_uint8 *)malloc(q_size + tau_size + a_size); if (!mem_buff) goto error; @@ -3209,35 +3332,35 @@ init_@lapack_func@_common(GQR_PARAMS_t *params, params->MC = mc; params->MN = min_m_n; params->A = a; - params->Q = q; - params->TAU = tau; + params->Q = (ftyp*)q; + params->TAU = (ftyp*)tau; params->LDA = lda; { /* compute optimal work size */ - @ftyp@ work_size_query; + ftyp work_size_query; params->WORK = &work_size_query; params->LWORK = -1; - if (call_@lapack_func@(params) != 0) + if (call_gqr(params) != 0) goto error; - work_count = (fortran_int) *(@ftyp@*) params->WORK; + work_count = (fortran_int) *(ftyp*) params->WORK; } params->LWORK = fortran_int_max(fortran_int_max(1, n), work_count); - work_size = (size_t) params->LWORK * sizeof(@ftyp@); + work_size = (size_t) params->LWORK * sizeof(ftyp); - mem_buff2 = malloc(work_size); + mem_buff2 = (npy_uint8 *)malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; - params->WORK = work; + params->WORK = (ftyp*)work; return 1; error: @@ -3249,18 +3372,14 @@ init_@lapack_func@_common(GQR_PARAMS_t *params, return 0; } -/**end repeat**/ -/**begin repeat - #lapack_func=zungqr# - #ftyp=fortran_doublecomplex# - */ static inline int -init_@lapack_func@_common(GQR_PARAMS_t *params, +init_gqr_common(GQR_PARAMS_t<fortran_doublecomplex> *params, fortran_int m, fortran_int n, fortran_int mc) { +using ftyp=fortran_doublecomplex; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *q, *tau, *work; @@ -3270,15 +3389,15 @@ init_@lapack_func@_common(GQR_PARAMS_t *params, size_t safe_m = m; size_t safe_n = n; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t q_size = safe_m * safe_mc * sizeof(@ftyp@); - size_t tau_size = safe_min_m_n * sizeof(@ftyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); + size_t q_size = safe_m * safe_mc * sizeof(ftyp); + size_t tau_size = safe_min_m_n * sizeof(ftyp); fortran_int work_count; size_t work_size; fortran_int lda = fortran_int_max(1, m); - mem_buff = malloc(q_size + tau_size + a_size); + mem_buff = (npy_uint8 *)malloc(q_size + tau_size + a_size); if (!mem_buff) goto error; @@ -3292,36 +3411,36 @@ init_@lapack_func@_common(GQR_PARAMS_t *params, params->MC = mc; params->MN = min_m_n; params->A = a; - params->Q = q; - params->TAU = tau; + params->Q = (ftyp*)q; + params->TAU = (ftyp*)tau; params->LDA = lda; { /* compute optimal work size */ - @ftyp@ work_size_query; + ftyp work_size_query; params->WORK = &work_size_query; params->LWORK = -1; - if (call_@lapack_func@(params) != 0) + if (call_gqr(params) != 0) goto error; - work_count = (fortran_int) ((@ftyp@*)params->WORK)->r; + work_count = (fortran_int) ((ftyp*)params->WORK)->r; } params->LWORK = fortran_int_max(fortran_int_max(1, n), work_count); - work_size = (size_t) params->LWORK * sizeof(@ftyp@); + work_size = (size_t) params->LWORK * sizeof(ftyp); - mem_buff2 = malloc(work_size); + mem_buff2 = (npy_uint8 *)malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; - params->WORK = work; + params->WORK = (ftyp*)work; params->LWORK = work_count; return 1; @@ -3334,15 +3453,14 @@ init_@lapack_func@_common(GQR_PARAMS_t *params, return 0; } -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* qr (modes - reduced) */ +template<typename typ> static inline void dump_gqr_params(const char *name, - GQR_PARAMS_t *params) + GQR_PARAMS_t<typ> *params) { TRACE_TXT("\n%s:\n"\ @@ -3368,27 +3486,21 @@ dump_gqr_params(const char *name, "LWORK", (int)params->LWORK); } -/**begin repeat - #lapack_func=dorgqr,zungqr# - #ftyp=fortran_doublereal,fortran_doublecomplex# - */ +template<typename ftyp> static inline int -init_@lapack_func@(GQR_PARAMS_t *params, +init_gqr(GQR_PARAMS_t<ftyp> *params, fortran_int m, fortran_int n) { - return init_@lapack_func@_common( + return init_gqr_common( params, m, n, fortran_int_min(m, n)); } -/**end repeat**/ -/**begin repeat - #lapack_func=dorgqr,zungqr# - */ +template<typename typ> static inline void -release_@lapack_func@(GQR_PARAMS_t* params) +release_gqr(GQR_PARAMS_t<typ>* params) { /* A and WORK contain allocated blocks */ free(params->Q); @@ -3396,22 +3508,13 @@ release_@lapack_func@(GQR_PARAMS_t* params) memset(params, 0, sizeof(*params)); } -/**end repeat**/ - -/**begin repeat - #TYPE=DOUBLE,CDOUBLE# - #REALTYPE=DOUBLE,DOUBLE# - #lapack_func=dorgqr,zungqr# - #typ = npy_double, npy_cdouble# - #basetyp = npy_double, npy_double# - #ftyp = fortran_doublereal,fortran_doublecomplex# - #cmplx = 0, 1# - */ +template<typename typ> static void -@TYPE@_qr_reduced(char **args, npy_intp const *dimensions, npy_intp const *steps, +qr_reduced(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GQR_PARAMS_t params; +using ftyp = fortran_type_t<typ>; + GQR_PARAMS_t<ftyp> params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n, m; @@ -3420,7 +3523,7 @@ static void m = (fortran_int)dimensions[0]; n = (fortran_int)dimensions[1]; - if (init_@lapack_func@(¶ms, m, n)) { + if (init_gqr(¶ms, m, n)) { LINEARIZE_DATA_t a_in, tau_in, q_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); @@ -3429,57 +3532,44 @@ static void BEGIN_OUTER_LOOP_3 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.Q, args[0], &a_in); - linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.Q, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.TAU, (typ*)args[1], &tau_in); + not_ok = call_gqr(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[2], params.Q, &q_out); + delinearize_matrix((typ*)args[2], (typ*)params.Q, &q_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[2], &q_out); + nan_matrix((typ*)args[2], &q_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_gqr(¶ms); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* qr (modes - complete) */ -/**begin repeat - #lapack_func=dorgqr,zungqr# - #ftyp=fortran_doublereal,fortran_doublecomplex# - */ +template<typename ftyp> static inline int -init_@lapack_func@_complete(GQR_PARAMS_t *params, +init_gqr_complete(GQR_PARAMS_t<ftyp> *params, fortran_int m, fortran_int n) { - return init_@lapack_func@_common(params, m, n, m); + return init_gqr_common(params, m, n, m); } -/**end repeat**/ -/**begin repeat - #TYPE=DOUBLE,CDOUBLE# - #REALTYPE=DOUBLE,DOUBLE# - #lapack_func=dorgqr,zungqr# - #typ = npy_double,npy_cdouble# - #basetyp = npy_double,npy_double# - #ftyp = fortran_doublereal,fortran_doublecomplex# - #cmplx = 0, 1# - */ +template<typename typ> static void -@TYPE@_qr_complete(char **args, npy_intp const *dimensions, npy_intp const *steps, +qr_complete(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GQR_PARAMS_t params; +using ftyp = fortran_type_t<typ>; + GQR_PARAMS_t<ftyp> params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n, m; @@ -3489,7 +3579,7 @@ static void n = (fortran_int)dimensions[1]; - if (init_@lapack_func@_complete(¶ms, m, n)) { + if (init_gqr_complete(¶ms, m, n)) { LINEARIZE_DATA_t a_in, tau_in, q_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); @@ -3498,51 +3588,50 @@ static void BEGIN_OUTER_LOOP_3 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.Q, args[0], &a_in); - linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in); - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.Q, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.TAU, (typ*)args[1], &tau_in); + not_ok = call_gqr(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[2], params.Q, &q_out); + delinearize_matrix((typ*)args[2], (typ*)params.Q, &q_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[2], &q_out); + nan_matrix((typ*)args[2], &q_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_gqr(¶ms); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* least squares */ -typedef struct gelsd_params_struct +template<typename typ> +struct GELSD_PARAMS_t { fortran_int M; fortran_int N; fortran_int NRHS; - void *A; + typ *A; fortran_int LDA; - void *B; + typ *B; fortran_int LDB; - void *S; - void *RCOND; + basetype_t<typ> *S; + basetype_t<typ> *RCOND; fortran_int RANK; - void *WORK; + typ *WORK; fortran_int LWORK; - void *RWORK; - void *IWORK; -} GELSD_PARAMS_t; - + basetype_t<typ> *RWORK; + fortran_int *IWORK; +}; +template<typename typ> static inline void dump_gelsd_params(const char *name, - GELSD_PARAMS_t *params) + GELSD_PARAMS_t<typ> *params) { TRACE_TXT("\n%s:\n"\ @@ -3583,18 +3672,27 @@ dump_gelsd_params(const char *name, "RCOND", params->RCOND); } +static inline fortran_int +call_gelsd(GELSD_PARAMS_t<fortran_real> *params) +{ + fortran_int rv; + LAPACK(sgelsd)(¶ms->M, ¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->B, ¶ms->LDB, + params->S, + params->RCOND, ¶ms->RANK, + params->WORK, ¶ms->LWORK, + params->IWORK, + &rv); + return rv; +} -/**begin repeat - #TYPE=FLOAT,DOUBLE# - #lapack_func=sgelsd,dgelsd# - #ftyp=fortran_real,fortran_doublereal# - */ static inline fortran_int -call_@lapack_func@(GELSD_PARAMS_t *params) +call_gelsd(GELSD_PARAMS_t<fortran_doublereal> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + LAPACK(dgelsd)(¶ms->M, ¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->B, ¶ms->LDB, params->S, @@ -3605,11 +3703,14 @@ call_@lapack_func@(GELSD_PARAMS_t *params) return rv; } + +template<typename ftyp> static inline int -init_@lapack_func@(GELSD_PARAMS_t *params, +init_gelsd(GELSD_PARAMS_t<ftyp> *params, fortran_int m, fortran_int n, - fortran_int nrhs) + fortran_int nrhs, +scalar_trait) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; @@ -3622,9 +3723,9 @@ init_@lapack_func@(GELSD_PARAMS_t *params, size_t safe_n = n; size_t safe_nrhs = nrhs; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); - size_t s_size = safe_min_m_n * sizeof(@ftyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); + size_t b_size = safe_max_m_n * safe_nrhs * sizeof(ftyp); + size_t s_size = safe_min_m_n * sizeof(ftyp); fortran_int work_count; size_t work_size; @@ -3632,7 +3733,7 @@ init_@lapack_func@(GELSD_PARAMS_t *params, fortran_int lda = fortran_int_max(1, m); fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); - mem_buff = malloc(a_size + b_size + s_size); + mem_buff = (npy_uint8 *)malloc(a_size + b_size + s_size); if (!mem_buff) goto error; @@ -3645,15 +3746,15 @@ init_@lapack_func@(GELSD_PARAMS_t *params, params->M = m; params->N = n; params->NRHS = nrhs; - params->A = a; - params->B = b; - params->S = s; + params->A = (ftyp*)a; + params->B = (ftyp*)b; + params->S = (ftyp*)s; params->LDA = lda; params->LDB = ldb; { /* compute optimal work size */ - @ftyp@ work_size_query; + ftyp work_size_query; fortran_int iwork_size_query; params->WORK = &work_size_query; @@ -3661,25 +3762,25 @@ init_@lapack_func@(GELSD_PARAMS_t *params, params->RWORK = NULL; params->LWORK = -1; - if (call_@lapack_func@(params) != 0) + if (call_gelsd(params) != 0) goto error; work_count = (fortran_int)work_size_query; - work_size = (size_t) work_size_query * sizeof(@ftyp@); + work_size = (size_t) work_size_query * sizeof(ftyp); iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); } - mem_buff2 = malloc(work_size + iwork_size); + mem_buff2 = (npy_uint8 *)malloc(work_size + iwork_size); if (!mem_buff2) goto error; work = mem_buff2; iwork = work + work_size; - params->WORK = work; + params->WORK = (ftyp*)work; params->RWORK = NULL; - params->IWORK = iwork; + params->IWORK = (fortran_int*)iwork; params->LWORK = work_count; return 1; @@ -3692,37 +3793,46 @@ init_@lapack_func@(GELSD_PARAMS_t *params, return 0; } -/**end repeat**/ - -/**begin repeat - #TYPE=CFLOAT,CDOUBLE# - #ftyp=fortran_complex,fortran_doublecomplex# - #frealtyp=fortran_real,fortran_doublereal# - #typ=COMPLEX_t,DOUBLECOMPLEX_t# - #lapack_func=cgelsd,zgelsd# - */ +static inline fortran_int +call_gelsd(GELSD_PARAMS_t<fortran_complex> *params) +{ + fortran_int rv; + LAPACK(cgelsd)(¶ms->M, ¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->B, ¶ms->LDB, + params->S, + params->RCOND, ¶ms->RANK, + params->WORK, ¶ms->LWORK, + params->RWORK, (fortran_int*)params->IWORK, + &rv); + return rv; +} static inline fortran_int -call_@lapack_func@(GELSD_PARAMS_t *params) +call_gelsd(GELSD_PARAMS_t<fortran_doublecomplex> *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + LAPACK(zgelsd)(¶ms->M, ¶ms->N, ¶ms->NRHS, params->A, ¶ms->LDA, params->B, ¶ms->LDB, params->S, params->RCOND, ¶ms->RANK, params->WORK, ¶ms->LWORK, - params->RWORK, params->IWORK, + params->RWORK, (fortran_int*)params->IWORK, &rv); return rv; } + +template<typename ftyp> static inline int -init_@lapack_func@(GELSD_PARAMS_t *params, +init_gelsd(GELSD_PARAMS_t<ftyp> *params, fortran_int m, fortran_int n, - fortran_int nrhs) + fortran_int nrhs, +complex_trait) { +using frealtyp = basetype_t<ftyp>; npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; npy_uint8 *a, *b, *s, *work, *iwork, *rwork; @@ -3734,16 +3844,16 @@ init_@lapack_func@(GELSD_PARAMS_t *params, size_t safe_n = n; size_t safe_nrhs = nrhs; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); - size_t s_size = safe_min_m_n * sizeof(@frealtyp@); + size_t a_size = safe_m * safe_n * sizeof(ftyp); + size_t b_size = safe_max_m_n * safe_nrhs * sizeof(ftyp); + size_t s_size = safe_min_m_n * sizeof(frealtyp); fortran_int work_count; size_t work_size, rwork_size, iwork_size; fortran_int lda = fortran_int_max(1, m); fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); - mem_buff = malloc(a_size + b_size + s_size); + mem_buff = (npy_uint8 *)malloc(a_size + b_size + s_size); if (!mem_buff) goto error; @@ -3756,16 +3866,16 @@ init_@lapack_func@(GELSD_PARAMS_t *params, params->M = m; params->N = n; params->NRHS = nrhs; - params->A = a; - params->B = b; - params->S = s; + params->A = (ftyp*)a; + params->B = (ftyp*)b; + params->S = (frealtyp*)s; params->LDA = lda; params->LDB = ldb; { /* compute optimal work size */ - @ftyp@ work_size_query; - @frealtyp@ rwork_size_query; + ftyp work_size_query; + frealtyp rwork_size_query; fortran_int iwork_size_query; params->WORK = &work_size_query; @@ -3773,17 +3883,17 @@ init_@lapack_func@(GELSD_PARAMS_t *params, params->RWORK = &rwork_size_query; params->LWORK = -1; - if (call_@lapack_func@(params) != 0) + if (call_gelsd(params) != 0) goto error; work_count = (fortran_int)work_size_query.r; - work_size = (size_t )work_size_query.r * sizeof(@ftyp@); - rwork_size = (size_t)rwork_size_query * sizeof(@frealtyp@); + work_size = (size_t )work_size_query.r * sizeof(ftyp); + rwork_size = (size_t)rwork_size_query * sizeof(frealtyp); iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); } - mem_buff2 = malloc(work_size + rwork_size + iwork_size); + mem_buff2 = (npy_uint8 *)malloc(work_size + rwork_size + iwork_size); if (!mem_buff2) goto error; @@ -3791,9 +3901,9 @@ init_@lapack_func@(GELSD_PARAMS_t *params, rwork = work + work_size; iwork = rwork + rwork_size; - params->WORK = work; - params->RWORK = rwork; - params->IWORK = iwork; + params->WORK = (ftyp*)work; + params->RWORK = (frealtyp*)rwork; + params->IWORK = (fortran_int*)iwork; params->LWORK = work_count; return 1; @@ -3806,22 +3916,9 @@ init_@lapack_func@(GELSD_PARAMS_t *params, return 0; } -/**end repeat**/ - - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# - #lapack_func=sgelsd,dgelsd,cgelsd,zgelsd# - #dot_func=sdot,ddot,cdotc,zdotc# - #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# - #basetyp = npy_float, npy_double, npy_float, npy_double# - #ftyp = fortran_real, fortran_doublereal, - fortran_complex, fortran_doublecomplex# - #cmplx = 0, 0, 1, 1# - */ +template<typename ftyp> static inline void -release_@lapack_func@(GELSD_PARAMS_t* params) +release_gelsd(GELSD_PARAMS_t<ftyp>* params) { /* A and WORK contain allocated blocks */ free(params->A); @@ -3830,26 +3927,38 @@ release_@lapack_func@(GELSD_PARAMS_t* params) } /** Compute the squared l2 norm of a contiguous vector */ -static @basetyp@ -@TYPE@_abs2(@typ@ *p, npy_intp n) { +template<typename typ> +static basetype_t<typ> +abs2(typ *p, npy_intp n, scalar_trait) { npy_intp i; - @basetyp@ res = 0; + basetype_t<typ> res = 0; for (i = 0; i < n; i++) { - @typ@ el = p[i]; -#if @cmplx@ - res += el.real*el.real + el.imag*el.imag; -#else + typ el = p[i]; res += el*el; -#endif + } + return res; +} +template<typename typ> +static basetype_t<typ> +abs2(typ *p, npy_intp n, complex_trait) { + npy_intp i; + basetype_t<typ> res = 0; + for (i = 0; i < n; i++) { + typ el = p[i]; + res += el.real*el.real + el.imag*el.imag; } return res; } + +template<typename typ> static void -@TYPE@_lstsq(char **args, npy_intp const *dimensions, npy_intp const *steps, +lstsq(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { - GELSD_PARAMS_t params; +using ftyp = fortran_type_t<typ>; +using basetyp = basetype_t<typ>; + GELSD_PARAMS_t<ftyp> params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n, m, nrhs; fortran_int excess; @@ -3861,7 +3970,7 @@ static void nrhs = (fortran_int)dimensions[2]; excess = m - n; - if (init_@lapack_func@(¶ms, m, n, nrhs)) { + if (init_gelsd(¶ms, m, n, nrhs, dispatch_scalar<ftyp>{})) { LINEARIZE_DATA_t a_in, b_in, x_out, s_out, r_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); @@ -3872,52 +3981,51 @@ static void BEGIN_OUTER_LOOP_7 int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.B, args[1], &b_in); - params.RCOND = args[2]; - not_ok = call_@lapack_func@(¶ms); + linearize_matrix((typ*)params.A, (typ*)args[0], &a_in); + linearize_matrix((typ*)params.B, (typ*)args[1], &b_in); + params.RCOND = (basetyp*)args[2]; + not_ok = call_gelsd(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[3], params.B, &x_out); + delinearize_matrix((typ*)args[3], (typ*)params.B, &x_out); *(npy_int*) args[5] = params.RANK; - delinearize_@REALTYPE@_matrix(args[6], params.S, &s_out); + delinearize_matrix((basetyp*)args[6], (basetyp*)params.S, &s_out); /* Note that linalg.lstsq discards this when excess == 0 */ if (excess >= 0 && params.RANK == n) { /* Compute the residuals as the square sum of each column */ int i; char *resid = args[4]; - @ftyp@ *components = (@ftyp@ *)params.B + n; + ftyp *components = (ftyp *)params.B + n; for (i = 0; i < nrhs; i++) { - @ftyp@ *vector = components + i*m; + ftyp *vector = components + i*m; /* Numpy and fortran floating types are the same size, * so this cast is safe */ - @basetyp@ abs2 = @TYPE@_abs2((@typ@ *)vector, excess); + basetyp abs = abs2((typ *)vector, excess, +dispatch_scalar<typ>{}); memcpy( resid + i*r_out.column_strides, - &abs2, sizeof(abs2)); + &abs, sizeof(abs)); } } else { /* Note that this is always discarded by linalg.lstsq */ - nan_@REALTYPE@_matrix(args[4], &r_out); + nan_matrix((basetyp*)args[4], &r_out); } } else { error_occurred = 1; - nan_@TYPE@_matrix(args[3], &x_out); - nan_@REALTYPE@_matrix(args[4], &r_out); + nan_matrix((typ*)args[3], &x_out); + nan_matrix((basetyp*)args[4], &r_out); *(npy_int*) args[5] = -1; - nan_@REALTYPE@_matrix(args[6], &s_out); + nan_matrix((basetyp*)args[6], &s_out); } END_OUTER_LOOP - release_@lapack_func@(¶ms); + release_gelsd(¶ms); } set_fp_invalid_or_clear(error_occurred); } -/**end repeat**/ - #pragma GCC diagnostic pop /* -------------------------------------------------------------------------- */ @@ -3962,6 +4070,22 @@ static void *array_of_nulls[] = { CFLOAT_ ## NAME, \ CDOUBLE_ ## NAME \ } +#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX_(NAME) \ + static PyUFuncGenericFunction \ + FUNC_ARRAY_NAME(NAME)[] = { \ + NAME<npy_float, npy_float>, \ + NAME<npy_double, npy_double>, \ + NAME<npy_cfloat, npy_float>, \ + NAME<npy_cdouble, npy_double> \ + } +#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(NAME) \ + static PyUFuncGenericFunction \ + FUNC_ARRAY_NAME(NAME)[] = { \ + NAME<npy_float>, \ + NAME<npy_double>, \ + NAME<npy_cfloat>, \ + NAME<npy_cdouble> \ + } /* There are problems with eig in complex single precision. * That kernel is disabled @@ -3969,9 +4093,9 @@ static void *array_of_nulls[] = { #define GUFUNC_FUNC_ARRAY_EIG(NAME) \ static PyUFuncGenericFunction \ FUNC_ARRAY_NAME(NAME)[] = { \ - FLOAT_ ## NAME, \ - DOUBLE_ ## NAME, \ - CDOUBLE_ ## NAME \ + NAME<fortran_complex,fortran_real>, \ + NAME<fortran_doublecomplex,fortran_doublereal>, \ + NAME<fortran_doublecomplex,fortran_doublecomplex> \ } /* The single precision functions are not used at all, @@ -3984,25 +4108,31 @@ static void *array_of_nulls[] = { DOUBLE_ ## NAME, \ CDOUBLE_ ## NAME \ } +#define GUFUNC_FUNC_ARRAY_QR__(NAME) \ + static PyUFuncGenericFunction \ + FUNC_ARRAY_NAME(NAME)[] = { \ + NAME<npy_double>, \ + NAME<npy_cdouble> \ + } -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); -GUFUNC_FUNC_ARRAY_QR(qr_r_raw); -GUFUNC_FUNC_ARRAY_QR(qr_reduced); -GUFUNC_FUNC_ARRAY_QR(qr_complete); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(lstsq); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX_(slogdet); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX_(det); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eighlo); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eighup); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eigvalshlo); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(eigvalshup); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(solve); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(solve1); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(inv); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(cholesky_lo); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(svd_N); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(svd_S); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(svd_A); +GUFUNC_FUNC_ARRAY_QR__(qr_r_raw); +GUFUNC_FUNC_ARRAY_QR__(qr_reduced); +GUFUNC_FUNC_ARRAY_QR__(qr_complete); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX__(lstsq); GUFUNC_FUNC_ARRAY_EIG(eig); GUFUNC_FUNC_ARRAY_EIG(eigvals); @@ -4095,9 +4225,9 @@ static char lstsq_types[] = { }; typedef struct gufunc_descriptor_struct { - char *name; - char *signature; - char *doc; + const char *name; + const char *signature; + const char *doc; int ntypes; int nin; int nout; @@ -4402,7 +4532,6 @@ PyMODINIT_FUNC PyInit__umath_linalg(void) PyObject *d; PyObject *version; - init_constants(); m = PyModule_Create(&moduledef); if (m == NULL) { return NULL; diff --git a/tools/gitpod/settings.json b/tools/gitpod/settings.json index ea0775f68..50296336d 100644 --- a/tools/gitpod/settings.json +++ b/tools/gitpod/settings.json @@ -1,9 +1,8 @@ { - "restructuredtext.languageServer.disabled": true, - "restructuredtext.builtDocumentationPath": "${workspaceRoot}/doc/build/html", - "restructuredtext.confPath": "", "restructuredtext.updateOnTextChanged": "true", "restructuredtext.updateDelay": 300, - "restructuredtext.linter.disabled": true, - "python.defaultInterpreterPath": "/home/gitpod/mambaforge3/envs/numpy-dev/bin/python" + "restructuredtext.linter.disabledLinters": ["doc8","rst-lint", "rstcheck"], + "python.defaultInterpreterPath": "/home/gitpod/mambaforge3/envs/numpy-dev/bin/python", + "esbonio.sphinx.buildDir": "${workspaceRoot}/doc/build/html", + "esbonio.sphinx.confDir": "" }
\ No newline at end of file |
