diff options
author | czgdp1807 <gdp.1807@gmail.com> | 2021-06-09 08:39:38 +0530 |
---|---|---|
committer | czgdp1807 <gdp.1807@gmail.com> | 2021-06-09 08:39:38 +0530 |
commit | 03bfafc427f23f3335f41d541730df6e9cfc514c (patch) | |
tree | b81d8f52467231e260faff3a367f718dbd69425f | |
parent | 38fcfc226a621769bbf8154e9e789c1e7a5a77c8 (diff) | |
download | numpy-03bfafc427f23f3335f41d541730df6e9cfc514c.tar.gz |
shifted qr code in umath_linalg.c.src above
-rw-r--r-- | numpy/linalg/umath_linalg.c.src | 851 |
1 files changed, 425 insertions, 426 deletions
diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.c.src index 8cb385518..3d74db210 100644 --- a/numpy/linalg/umath_linalg.c.src +++ b/numpy/linalg/umath_linalg.c.src @@ -2868,170 +2868,133 @@ static void /**end repeat**/ - /* -------------------------------------------------------------------------- */ - /* least squares */ + /* qr (modes - r, raw) */ -typedef struct gelsd_params_struct +typedef struct geqfr_params_struct { fortran_int M; fortran_int N; - fortran_int NRHS; void *A; fortran_int LDA; - void *B; - fortran_int LDB; - void *S; - void *RCOND; - fortran_int RANK; + void* TAU; void *WORK; fortran_int LWORK; - void *RWORK; - void *IWORK; -} GELSD_PARAMS_t; +} GEQRF_PARAMS_t; static inline void -dump_gelsd_params(const char *name, - GELSD_PARAMS_t *params) +dump_geqrf_params(const char *name, + GEQRF_PARAMS_t *params) { TRACE_TXT("\n%s:\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ "%14s: %18p\n"\ - "%14s: %18p\n"\ - "%14s: %18p\n"\ - "%14s: %18p\n"\ - - "%14s: %18d\n"\ - "%14s: %18d\n"\ - "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ - "%14s: %18d\n"\ - - "%14s: %18p\n", + "%14s: %18d\n", name, "A", params->A, - "B", params->B, - "S", params->S, + "TAU", params->TAU, "WORK", params->WORK, - "RWORK", params->RWORK, - "IWORK", params->IWORK, "M", (int)params->M, "N", (int)params->N, - "NRHS", (int)params->NRHS, "LDA", (int)params->LDA, - "LDB", (int)params->LDB, - "LWORK", (int)params->LWORK, - "RANK", (int)params->RANK, - - "RCOND", params->RCOND); + "LWORK", (int)params->LWORK); } - /**begin repeat - #TYPE=FLOAT,DOUBLE# - #lapack_func=sgelsd,dgelsd# - #ftyp=fortran_real,fortran_doublereal# + #lapack_func=sgeqrf,dgeqrf,cgeqrf,zgeqrf# */ static inline fortran_int -call_@lapack_func@(GELSD_PARAMS_t *params) +call_@lapack_func@(GEQRF_PARAMS_t *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, params->A, ¶ms->LDA, - params->B, ¶ms->LDB, - params->S, - params->RCOND, ¶ms->RANK, + params->TAU, params->WORK, ¶ms->LWORK, - params->IWORK, &rv); return rv; } +/**end repeat**/ + +/**begin repeat + #TYPE=FLOAT,DOUBLE# + #lapack_func=sgeqrf,dgeqrf# + #ftyp=fortran_real,fortran_doublereal# + */ static inline int -init_@lapack_func@(GELSD_PARAMS_t *params, +init_@lapack_func@(GEQRF_PARAMS_t *params, fortran_int m, - fortran_int n, - fortran_int nrhs) + fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; - npy_uint8 *a, *b, *s, *work, *iwork; + npy_uint8 *a, *tau, *work; fortran_int min_m_n = fortran_int_min(m, n); - fortran_int max_m_n = fortran_int_max(m, n); size_t safe_min_m_n = min_m_n; - size_t safe_max_m_n = max_m_n; size_t safe_m = m; size_t safe_n = n; - size_t safe_nrhs = nrhs; size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); - size_t s_size = safe_min_m_n * sizeof(@ftyp@); + size_t tau_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; size_t work_size; - size_t iwork_size; fortran_int lda = fortran_int_max(1, m); - fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); - mem_buff = malloc(a_size + b_size + s_size); + mem_buff = malloc(a_size + tau_size); if (!mem_buff) goto error; a = mem_buff; - b = a + a_size; - s = b + b_size; + tau = a + a_size; + memset(tau, 0, tau_size); params->M = m; params->N = n; - params->NRHS = nrhs; params->A = a; - params->B = b; - params->S = s; + params->TAU = tau; params->LDA = lda; - params->LDB = ldb; { /* compute optimal work size */ + @ftyp@ work_size_query; - fortran_int iwork_size_query; params->WORK = &work_size_query; - params->IWORK = &iwork_size_query; - params->RWORK = NULL; params->LWORK = -1; if (call_@lapack_func@(params) != 0) goto error; - work_count = (fortran_int)work_size_query; + work_count = (fortran_int) *(@ftyp@*) params->WORK; - work_size = (size_t) work_size_query * sizeof(@ftyp@); - iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); } - mem_buff2 = malloc(work_size + iwork_size); + params->LWORK = fortran_int_max(fortran_int_max(1, n), + work_count); + + work_size = (size_t) params->LWORK * sizeof(@ftyp@); + mem_buff2 = malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; - iwork = work + work_size; + memset(work, 0, work_size); params->WORK = work; - params->RWORK = NULL; - params->IWORK = iwork; - params->LWORK = work_count; return 1; error: @@ -3047,105 +3010,73 @@ init_@lapack_func@(GELSD_PARAMS_t *params, /**begin repeat #TYPE=CFLOAT,CDOUBLE# + #lapack_func=cgeqrf,zgeqrf# #ftyp=fortran_complex,fortran_doublecomplex# - #frealtyp=fortran_real,fortran_doublereal# - #typ=COMPLEX_t,DOUBLECOMPLEX_t# - #lapack_func=cgelsd,zgelsd# */ - -static inline fortran_int -call_@lapack_func@(GELSD_PARAMS_t *params) -{ - fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, - params->A, ¶ms->LDA, - params->B, ¶ms->LDB, - params->S, - params->RCOND, ¶ms->RANK, - params->WORK, ¶ms->LWORK, - params->RWORK, params->IWORK, - &rv); - return rv; -} - static inline int -init_@lapack_func@(GELSD_PARAMS_t *params, +init_@lapack_func@(GEQRF_PARAMS_t *params, fortran_int m, - fortran_int n, - fortran_int nrhs) + fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; - npy_uint8 *a, *b, *s, *work, *iwork, *rwork; + npy_uint8 *a, *tau, *work; fortran_int min_m_n = fortran_int_min(m, n); - fortran_int max_m_n = fortran_int_max(m, n); size_t safe_min_m_n = min_m_n; - size_t safe_max_m_n = max_m_n; size_t safe_m = m; size_t safe_n = n; - size_t safe_nrhs = nrhs; size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); - size_t s_size = safe_min_m_n * sizeof(@frealtyp@); + size_t tau_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; - size_t work_size, rwork_size, iwork_size; + size_t work_size; fortran_int lda = fortran_int_max(1, m); - fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); - mem_buff = malloc(a_size + b_size + s_size); + mem_buff = malloc(a_size + tau_size); if (!mem_buff) goto error; a = mem_buff; - b = a + a_size; - s = b + b_size; + tau = a + a_size; + memset(tau, 0, tau_size); params->M = m; params->N = n; - params->NRHS = nrhs; params->A = a; - params->B = b; - params->S = s; + params->TAU = tau; params->LDA = lda; - params->LDB = ldb; { /* compute optimal work size */ + @ftyp@ work_size_query; - @frealtyp@ rwork_size_query; - fortran_int iwork_size_query; params->WORK = &work_size_query; - params->IWORK = &iwork_size_query; - params->RWORK = &rwork_size_query; params->LWORK = -1; if (call_@lapack_func@(params) != 0) goto error; - work_count = (fortran_int)work_size_query.r; + work_count = (fortran_int) ((@ftyp@*)params->WORK)->r; - work_size = (size_t )work_size_query.r * sizeof(@ftyp@); - rwork_size = (size_t)rwork_size_query * sizeof(@frealtyp@); - iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); } - mem_buff2 = malloc(work_size + rwork_size + iwork_size); + params->LWORK = fortran_int_max(fortran_int_max(1, n), + work_count); + + work_size = (size_t) params->LWORK * sizeof(@ftyp@); + + mem_buff2 = malloc(work_size); if (!mem_buff2) goto error; work = mem_buff2; - rwork = work + work_size; - iwork = rwork + rwork_size; + memset(work, 0, work_size); params->WORK = work; - params->RWORK = rwork; - params->IWORK = iwork; - params->LWORK = work_count; return 1; error: @@ -3159,20 +3090,11 @@ init_@lapack_func@(GELSD_PARAMS_t *params, /**end repeat**/ - /**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# - #lapack_func=sgelsd,dgelsd,cgelsd,zgelsd# - #dot_func=sdot,ddot,cdotc,zdotc# - #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# - #basetyp = npy_float, npy_double, npy_float, npy_double# - #ftyp = fortran_real, fortran_doublereal, - fortran_complex, fortran_doublecomplex# - #cmplx = 0, 0, 1, 1# + #lapack_func=sgeqrf,dgeqrf,cgeqrf,zgeqrf# */ static inline void -release_@lapack_func@(GELSD_PARAMS_t* params) +release_@lapack_func@(GEQRF_PARAMS_t* params) { /* A and WORK contain allocated blocks */ free(params->A); @@ -3180,84 +3102,48 @@ release_@lapack_func@(GELSD_PARAMS_t* params) memset(params, 0, sizeof(*params)); } -/** Compute the squared l2 norm of a contiguous vector */ -static @basetyp@ -@TYPE@_abs2(@typ@ *p, npy_intp n) { - npy_intp i; - @basetyp@ res = 0; - for (i = 0; i < n; i++) { - @typ@ el = p[i]; -#if @cmplx@ - res += el.real*el.real + el.imag*el.imag; -#else - res += el*el; -#endif - } - return res; -} +/**end repeat**/ +/**begin repeat + #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# + #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# + #lapack_func=sgeqrf,dgeqrf,cgeqrf,zgeqrf# + #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# + #basetyp = npy_float, npy_double, npy_float, npy_double# + #ftyp = fortran_real, fortran_doublereal, + fortran_complex, fortran_doublecomplex# + #cmplx = 0, 0, 1, 1# + */ static void -@TYPE@_lstsq(char **args, npy_intp const *dimensions, npy_intp const *steps, - void *NPY_UNUSED(func)) +@TYPE@_qr_r_raw(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) { - GELSD_PARAMS_t params; + GEQRF_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); - fortran_int n, m, nrhs; - fortran_int excess; + fortran_int n, m; - INIT_OUTER_LOOP_7 + INIT_OUTER_LOOP_2 m = (fortran_int)dimensions[0]; n = (fortran_int)dimensions[1]; - nrhs = (fortran_int)dimensions[2]; - excess = m - n; - if (init_@lapack_func@(¶ms, m, n, nrhs)) { - LINEARIZE_DATA_t a_in, b_in, x_out, s_out, r_out; + if (init_@lapack_func@(¶ms, m, n)) { + LINEARIZE_DATA_t a_in, tau_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); - init_linearize_data_ex(&b_in, nrhs, m, steps[3], steps[2], fortran_int_max(n, m)); - init_linearize_data_ex(&x_out, nrhs, n, steps[5], steps[4], fortran_int_max(n, m)); - init_linearize_data(&r_out, 1, nrhs, 1, steps[6]); - init_linearize_data(&s_out, 1, fortran_int_min(n, m), 1, steps[7]); + init_linearize_data(&tau_out, 1, fortran_int_min(m, n), 1, steps[2]); - BEGIN_OUTER_LOOP_7 + BEGIN_OUTER_LOOP_2 int not_ok; linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.B, args[1], &b_in); - params.RCOND = args[2]; not_ok = call_@lapack_func@(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[3], params.B, &x_out); - *(npy_int*) args[5] = params.RANK; - delinearize_@REALTYPE@_matrix(args[6], params.S, &s_out); - - /* Note that linalg.lstsq discards this when excess == 0 */ - if (excess >= 0 && params.RANK == n) { - /* Compute the residuals as the square sum of each column */ - int i; - char *resid = args[4]; - @ftyp@ *components = (@ftyp@ *)params.B + n; - for (i = 0; i < nrhs; i++) { - @ftyp@ *vector = components + i*m; - /* Numpy and fortran floating types are the same size, - * so this cast is safe */ - @basetyp@ abs2 = @TYPE@_abs2((@typ@ *)vector, excess); - memcpy( - resid + i*r_out.column_strides, - &abs2, sizeof(abs2)); - } - } - else { - /* Note that this is always discarded by linalg.lstsq */ - nan_@REALTYPE@_matrix(args[4], &r_out); - } + delinearize_@TYPE@_matrix(args[0], params.A, &a_in); + delinearize_@TYPE@_matrix(args[1], params.TAU, &tau_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[3], &x_out); - nan_@REALTYPE@_matrix(args[4], &r_out); - *(npy_int*) args[5] = -1; - nan_@REALTYPE@_matrix(args[6], &s_out); + nan_@TYPE@_matrix(args[0], &a_in); + nan_@TYPE@_matrix(args[1], &tau_out); } END_OUTER_LOOP @@ -3270,23 +3156,25 @@ static void /**end repeat**/ /* -------------------------------------------------------------------------- */ - /* qr (modes - r, raw) */ + /* qr (modes - reduced) */ -typedef struct geqfr_params_struct +typedef struct gqr_params_struct { fortran_int M; - fortran_int N; - void *A; + fortran_int MC; + fortran_int MN; + void* A; + void *Q; fortran_int LDA; void* TAU; void *WORK; fortran_int LWORK; -} GEQRF_PARAMS_t; +} GQR_PARAMS_t; static inline void -dump_geqrf_params(const char *name, - GEQRF_PARAMS_t *params) +dump_gqr_params(const char *name, + GQR_PARAMS_t *params) { TRACE_TXT("\n%s:\n"\ @@ -3296,30 +3184,32 @@ dump_geqrf_params(const char *name, "%14s: %18d\n"\ "%14s: %18d\n"\ "%14s: %18d\n"\ + "%14s: %18d\n"\ "%14s: %18d\n", name, - "A", params->A, + "Q", params->Q, "TAU", params->TAU, "WORK", params->WORK, "M", (int)params->M, - "N", (int)params->N, + "MC", (int)params->MC, + "MN", (int)params->MN, "LDA", (int)params->LDA, "LWORK", (int)params->LWORK); } /**begin repeat - #lapack_func=sgeqrf,dgeqrf,cgeqrf,zgeqrf# + #lapack_func=sorgqr,dorgqr,cungqr,zungqr# */ static inline fortran_int -call_@lapack_func@(GEQRF_PARAMS_t *params) +call_@lapack_func@(GQR_PARAMS_t *params) { fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, - params->A, ¶ms->LDA, + LAPACK(@lapack_func@)(¶ms->M, ¶ms->MC, ¶ms->MN, + params->Q, ¶ms->LDA, params->TAU, params->WORK, ¶ms->LWORK, &rv); @@ -3330,48 +3220,49 @@ call_@lapack_func@(GEQRF_PARAMS_t *params) /**begin repeat #TYPE=FLOAT,DOUBLE# - #lapack_func=sgeqrf,dgeqrf# + #lapack_func=sorgqr,dorgqr# #ftyp=fortran_real,fortran_doublereal# */ static inline int -init_@lapack_func@(GEQRF_PARAMS_t *params, +init_@lapack_func@(GQR_PARAMS_t *params, fortran_int m, fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; - npy_uint8 *a, *tau, *work; + npy_uint8 *a, *q, *tau, *work; fortran_int min_m_n = fortran_int_min(m, n); size_t safe_min_m_n = min_m_n; size_t safe_m = m; size_t safe_n = n; - size_t a_size = safe_m * safe_n * sizeof(@ftyp@); + size_t q_size = safe_m * safe_min_m_n * sizeof(@ftyp@); size_t tau_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; size_t work_size; fortran_int lda = fortran_int_max(1, m); - mem_buff = malloc(a_size + tau_size); + mem_buff = malloc(q_size + tau_size + a_size); if (!mem_buff) goto error; - a = mem_buff; - tau = a + a_size; - memset(tau, 0, tau_size); + q = mem_buff; + tau = q + q_size; + a = tau + tau_size; params->M = m; - params->N = n; + params->MC = min_m_n; + params->MN = min_m_n; params->A = a; + params->Q = q; params->TAU = tau; params->LDA = lda; { /* compute optimal work size */ - @ftyp@ work_size_query; params->WORK = &work_size_query; @@ -3388,6 +3279,7 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, work_count); work_size = (size_t) params->LWORK * sizeof(@ftyp@); + mem_buff2 = malloc(work_size); if (!mem_buff2) goto error; @@ -3411,48 +3303,50 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, /**begin repeat #TYPE=CFLOAT,CDOUBLE# - #lapack_func=cgeqrf,zgeqrf# + #lapack_func=cungqr,zungqr# #ftyp=fortran_complex,fortran_doublecomplex# */ static inline int -init_@lapack_func@(GEQRF_PARAMS_t *params, +init_@lapack_func@(GQR_PARAMS_t *params, fortran_int m, fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; - npy_uint8 *a, *tau, *work; + npy_uint8 *a, *q, *tau, *work; fortran_int min_m_n = fortran_int_min(m, n); size_t safe_min_m_n = min_m_n; size_t safe_m = m; size_t safe_n = n; size_t a_size = safe_m * safe_n * sizeof(@ftyp@); + size_t q_size = safe_m * safe_min_m_n * sizeof(@ftyp@); size_t tau_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; size_t work_size; fortran_int lda = fortran_int_max(1, m); - mem_buff = malloc(a_size + tau_size); + mem_buff = malloc(q_size + tau_size + a_size); if (!mem_buff) goto error; - a = mem_buff; - tau = a + a_size; - memset(tau, 0, tau_size); + q = mem_buff; + tau = q + q_size; + a = tau + tau_size; params->M = m; - params->N = n; + params->MC = min_m_n; + params->MN = min_m_n; params->A = a; + params->Q = q; params->TAU = tau; params->LDA = lda; { /* compute optimal work size */ - @ftyp@ work_size_query; params->WORK = &work_size_query; @@ -3478,6 +3372,7 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, memset(work, 0, work_size); params->WORK = work; + params->LWORK = work_count; return 1; error: @@ -3492,13 +3387,13 @@ init_@lapack_func@(GEQRF_PARAMS_t *params, /**end repeat**/ /**begin repeat - #lapack_func=sgeqrf,dgeqrf,cgeqrf,zgeqrf# + #lapack_func=sorgqr,dorgqr,cungqr,zungqr# */ static inline void -release_@lapack_func@(GEQRF_PARAMS_t* params) +release_@lapack_func@(GQR_PARAMS_t* params) { /* A and WORK contain allocated blocks */ - free(params->A); + free(params->Q); free(params->WORK); memset(params, 0, sizeof(*params)); } @@ -3508,7 +3403,7 @@ release_@lapack_func@(GEQRF_PARAMS_t* params) /**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# - #lapack_func=sgeqrf,dgeqrf,cgeqrf,zgeqrf# + #lapack_func=sorgqr,dorgqr,cungqr,zungqr# #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# #basetyp = npy_float, npy_double, npy_float, npy_double# #ftyp = fortran_real, fortran_doublereal, @@ -3516,35 +3411,41 @@ release_@lapack_func@(GEQRF_PARAMS_t* params) #cmplx = 0, 0, 1, 1# */ static void -@TYPE@_qr_r_raw(char **args, npy_intp const *dimensions, npy_intp const *steps, - void *NPY_UNUSED(func)) +@TYPE@_qr_reduced(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) { - GEQRF_PARAMS_t params; + GQR_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n, m; + size_t safe_min_m_n, safe_m; - INIT_OUTER_LOOP_2 + INIT_OUTER_LOOP_3 m = (fortran_int)dimensions[0]; n = (fortran_int)dimensions[1]; + safe_min_m_n = fortran_int_min(m, n); + safe_m = m; if (init_@lapack_func@(¶ms, m, n)) { - LINEARIZE_DATA_t a_in, tau_out; + LINEARIZE_DATA_t a_in, tau_in, q_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); - init_linearize_data(&tau_out, 1, fortran_int_min(m, n), 1, steps[2]); + init_linearize_data(&tau_in, 1, fortran_int_min(m, n), 1, steps[2]); + init_linearize_data(&q_out, fortran_int_min(m, n), m, steps[4], steps[3]); - BEGIN_OUTER_LOOP_2 + BEGIN_OUTER_LOOP_3 int not_ok; linearize_@TYPE@_matrix(params.A, args[0], &a_in); + memcpy(params.Q, params.A, safe_min_m_n * safe_m * sizeof(@ftyp@)); + linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in); not_ok = call_@lapack_func@(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[0], params.A, &a_in); - delinearize_@TYPE@_matrix(args[1], params.TAU, &tau_out); + delinearize_@TYPE@_matrix(args[1], params.TAU, &tau_in); + delinearize_@TYPE@_matrix(args[2], params.Q, &q_out); } else { error_occurred = 1; - nan_@TYPE@_matrix(args[0], &a_in); - nan_@TYPE@_matrix(args[1], &tau_out); + nan_@TYPE@_matrix(args[1], &tau_in); + nan_@TYPE@_matrix(args[2], &q_out); } END_OUTER_LOOP @@ -3557,67 +3458,7 @@ static void /**end repeat**/ /* -------------------------------------------------------------------------- */ - /* qr (modes - reduced) */ - -typedef struct gqr_params_struct -{ - fortran_int M; - fortran_int MC; - fortran_int MN; - void* A; - void *Q; - fortran_int LDA; - void* TAU; - void *WORK; - fortran_int LWORK; -} GQR_PARAMS_t; - - -static inline void -dump_gqr_params(const char *name, - GQR_PARAMS_t *params) -{ - TRACE_TXT("\n%s:\n"\ - - "%14s: %18p\n"\ - "%14s: %18p\n"\ - "%14s: %18p\n"\ - "%14s: %18d\n"\ - "%14s: %18d\n"\ - "%14s: %18d\n"\ - "%14s: %18d\n"\ - "%14s: %18d\n", - - name, - - "Q", params->Q, - "TAU", params->TAU, - "WORK", params->WORK, - - "M", (int)params->M, - "MC", (int)params->MC, - "MN", (int)params->MN, - "LDA", (int)params->LDA, - "LWORK", (int)params->LWORK); -} - -/**begin repeat - #lapack_func=sorgqr,dorgqr,cungqr,zungqr# - */ - -static inline fortran_int -call_@lapack_func@(GQR_PARAMS_t *params) -{ - fortran_int rv; - LAPACK(@lapack_func@)(¶ms->M, ¶ms->MC, ¶ms->MN, - params->Q, ¶ms->LDA, - params->TAU, - params->WORK, ¶ms->LWORK, - &rv); - return rv; -} - -/**end repeat**/ + /* qr (modes - complete) */ /**begin repeat #TYPE=FLOAT,DOUBLE# @@ -3625,7 +3466,7 @@ call_@lapack_func@(GQR_PARAMS_t *params) #ftyp=fortran_real,fortran_doublereal# */ static inline int -init_@lapack_func@(GQR_PARAMS_t *params, +init_@lapack_func@_complete(GQR_PARAMS_t *params, fortran_int m, fortran_int n) { @@ -3636,8 +3477,9 @@ init_@lapack_func@(GQR_PARAMS_t *params, size_t safe_min_m_n = min_m_n; size_t safe_m = m; size_t safe_n = n; + size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t q_size = safe_m * safe_min_m_n * sizeof(@ftyp@); + size_t q_size = safe_m * safe_m * sizeof(@ftyp@); size_t tau_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; @@ -3655,7 +3497,7 @@ init_@lapack_func@(GQR_PARAMS_t *params, params->M = m; - params->MC = min_m_n; + params->MC = m; params->MN = min_m_n; params->A = a; params->Q = q; @@ -3708,9 +3550,9 @@ init_@lapack_func@(GQR_PARAMS_t *params, #ftyp=fortran_complex,fortran_doublecomplex# */ static inline int -init_@lapack_func@(GQR_PARAMS_t *params, - fortran_int m, - fortran_int n) +init_@lapack_func@_complete(GQR_PARAMS_t *params, + fortran_int m, + fortran_int n) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; @@ -3721,7 +3563,7 @@ init_@lapack_func@(GQR_PARAMS_t *params, size_t safe_n = n; size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t q_size = safe_m * safe_min_m_n * sizeof(@ftyp@); + size_t q_size = safe_m * safe_m * sizeof(@ftyp@); size_t tau_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; @@ -3739,7 +3581,7 @@ init_@lapack_func@(GQR_PARAMS_t *params, params->M = m; - params->MC = min_m_n; + params->MC = m; params->MN = min_m_n; params->A = a; params->Q = q; @@ -3764,7 +3606,6 @@ init_@lapack_func@(GQR_PARAMS_t *params, work_count); work_size = (size_t) params->LWORK * sizeof(@ftyp@); - mem_buff2 = malloc(work_size); if (!mem_buff2) goto error; @@ -3788,20 +3629,6 @@ init_@lapack_func@(GQR_PARAMS_t *params, /**end repeat**/ /**begin repeat - #lapack_func=sorgqr,dorgqr,cungqr,zungqr# - */ -static inline void -release_@lapack_func@(GQR_PARAMS_t* params) -{ - /* A and WORK contain allocated blocks */ - free(params->Q); - free(params->WORK); - memset(params, 0, sizeof(*params)); -} - -/**end repeat**/ - -/**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# #lapack_func=sorgqr,dorgqr,cungqr,zungqr# @@ -3812,32 +3639,33 @@ release_@lapack_func@(GQR_PARAMS_t* params) #cmplx = 0, 0, 1, 1# */ static void -@TYPE@_qr_reduced(char **args, npy_intp const *dimensions, npy_intp const *steps, +@TYPE@_qr_complete(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { GQR_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); fortran_int n, m; - size_t safe_min_m_n, safe_m; + size_t safe_n, safe_m; INIT_OUTER_LOOP_3 m = (fortran_int)dimensions[0]; n = (fortran_int)dimensions[1]; - safe_min_m_n = fortran_int_min(m, n); + safe_n = n; safe_m = m; - if (init_@lapack_func@(¶ms, m, n)) { + + if (init_@lapack_func@_complete(¶ms, m, n)) { LINEARIZE_DATA_t a_in, tau_in, q_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); init_linearize_data(&tau_in, 1, fortran_int_min(m, n), 1, steps[2]); - init_linearize_data(&q_out, fortran_int_min(m, n), m, steps[4], steps[3]); + init_linearize_data(&q_out, m, m, steps[4], steps[3]); BEGIN_OUTER_LOOP_3 int not_ok; linearize_@TYPE@_matrix(params.A, args[0], &a_in); - memcpy(params.Q, params.A, safe_min_m_n * safe_m * sizeof(@ftyp@)); + memcpy(params.Q, params.A, safe_n * safe_m * sizeof(@ftyp@)); linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in); not_ok = call_@lapack_func@(¶ms); if (!not_ok) { @@ -3859,79 +3687,168 @@ static void /**end repeat**/ /* -------------------------------------------------------------------------- */ - /* qr (modes - complete) */ + /* least squares */ + +typedef struct gelsd_params_struct +{ + fortran_int M; + fortran_int N; + fortran_int NRHS; + void *A; + fortran_int LDA; + void *B; + fortran_int LDB; + void *S; + void *RCOND; + fortran_int RANK; + void *WORK; + fortran_int LWORK; + void *RWORK; + void *IWORK; +} GELSD_PARAMS_t; + + +static inline void +dump_gelsd_params(const char *name, + GELSD_PARAMS_t *params) +{ + TRACE_TXT("\n%s:\n"\ + + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + "%14s: %18p\n"\ + + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + "%14s: %18d\n"\ + + "%14s: %18p\n", + + name, + + "A", params->A, + "B", params->B, + "S", params->S, + "WORK", params->WORK, + "RWORK", params->RWORK, + "IWORK", params->IWORK, + + "M", (int)params->M, + "N", (int)params->N, + "NRHS", (int)params->NRHS, + "LDA", (int)params->LDA, + "LDB", (int)params->LDB, + "LWORK", (int)params->LWORK, + "RANK", (int)params->RANK, + + "RCOND", params->RCOND); +} + /**begin repeat #TYPE=FLOAT,DOUBLE# - #lapack_func=sorgqr,dorgqr# + #lapack_func=sgelsd,dgelsd# #ftyp=fortran_real,fortran_doublereal# */ + +static inline fortran_int +call_@lapack_func@(GELSD_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->B, ¶ms->LDB, + params->S, + params->RCOND, ¶ms->RANK, + params->WORK, ¶ms->LWORK, + params->IWORK, + &rv); + return rv; +} + static inline int -init_@lapack_func@_complete(GQR_PARAMS_t *params, +init_@lapack_func@(GELSD_PARAMS_t *params, fortran_int m, - fortran_int n) + fortran_int n, + fortran_int nrhs) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; - npy_uint8 *a, *q, *tau, *work; + npy_uint8 *a, *b, *s, *work, *iwork; fortran_int min_m_n = fortran_int_min(m, n); + fortran_int max_m_n = fortran_int_max(m, n); size_t safe_min_m_n = min_m_n; + size_t safe_max_m_n = max_m_n; size_t safe_m = m; size_t safe_n = n; + size_t safe_nrhs = nrhs; size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t q_size = safe_m * safe_m * sizeof(@ftyp@); - size_t tau_size = safe_min_m_n * sizeof(@ftyp@); + size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); + size_t s_size = safe_min_m_n * sizeof(@ftyp@); fortran_int work_count; size_t work_size; + size_t iwork_size; fortran_int lda = fortran_int_max(1, m); + fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); - mem_buff = malloc(q_size + tau_size + a_size); + mem_buff = malloc(a_size + b_size + s_size); if (!mem_buff) goto error; - q = mem_buff; - tau = q + q_size; - a = tau + tau_size; + a = mem_buff; + b = a + a_size; + s = b + b_size; params->M = m; - params->MC = m; - params->MN = min_m_n; + params->N = n; + params->NRHS = nrhs; params->A = a; - params->Q = q; - params->TAU = tau; + params->B = b; + params->S = s; params->LDA = lda; + params->LDB = ldb; { /* compute optimal work size */ @ftyp@ work_size_query; + fortran_int iwork_size_query; params->WORK = &work_size_query; + params->IWORK = &iwork_size_query; + params->RWORK = NULL; params->LWORK = -1; if (call_@lapack_func@(params) != 0) goto error; - work_count = (fortran_int) *(@ftyp@*) params->WORK; + work_count = (fortran_int)work_size_query; + work_size = (size_t) work_size_query * sizeof(@ftyp@); + iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); } - params->LWORK = fortran_int_max(fortran_int_max(1, n), - work_count); - - work_size = (size_t) params->LWORK * sizeof(@ftyp@); - - mem_buff2 = malloc(work_size); + mem_buff2 = malloc(work_size + iwork_size); if (!mem_buff2) goto error; work = mem_buff2; - memset(work, 0, work_size); + iwork = work + work_size; params->WORK = work; + params->RWORK = NULL; + params->IWORK = iwork; + params->LWORK = work_count; return 1; error: @@ -3947,74 +3864,104 @@ init_@lapack_func@_complete(GQR_PARAMS_t *params, /**begin repeat #TYPE=CFLOAT,CDOUBLE# - #lapack_func=cungqr,zungqr# #ftyp=fortran_complex,fortran_doublecomplex# + #frealtyp=fortran_real,fortran_doublereal# + #typ=COMPLEX_t,DOUBLECOMPLEX_t# + #lapack_func=cgelsd,zgelsd# */ + +static inline fortran_int +call_@lapack_func@(GELSD_PARAMS_t *params) +{ + fortran_int rv; + LAPACK(@lapack_func@)(¶ms->M, ¶ms->N, ¶ms->NRHS, + params->A, ¶ms->LDA, + params->B, ¶ms->LDB, + params->S, + params->RCOND, ¶ms->RANK, + params->WORK, ¶ms->LWORK, + params->RWORK, params->IWORK, + &rv); + return rv; +} + static inline int -init_@lapack_func@_complete(GQR_PARAMS_t *params, - fortran_int m, - fortran_int n) +init_@lapack_func@(GELSD_PARAMS_t *params, + fortran_int m, + fortran_int n, + fortran_int nrhs) { npy_uint8 *mem_buff = NULL; npy_uint8 *mem_buff2 = NULL; - npy_uint8 *a, *q, *tau, *work; + npy_uint8 *a, *b, *s, *work, *iwork, *rwork; fortran_int min_m_n = fortran_int_min(m, n); + fortran_int max_m_n = fortran_int_max(m, n); size_t safe_min_m_n = min_m_n; + size_t safe_max_m_n = max_m_n; size_t safe_m = m; size_t safe_n = n; + size_t safe_nrhs = nrhs; size_t a_size = safe_m * safe_n * sizeof(@ftyp@); - size_t q_size = safe_m * safe_m * sizeof(@ftyp@); - size_t tau_size = safe_min_m_n * sizeof(@ftyp@); + size_t b_size = safe_max_m_n * safe_nrhs * sizeof(@ftyp@); + size_t s_size = safe_min_m_n * sizeof(@frealtyp@); fortran_int work_count; - size_t work_size; + size_t work_size, rwork_size, iwork_size; fortran_int lda = fortran_int_max(1, m); + fortran_int ldb = fortran_int_max(1, fortran_int_max(m,n)); - mem_buff = malloc(q_size + tau_size + a_size); + mem_buff = malloc(a_size + b_size + s_size); if (!mem_buff) goto error; - q = mem_buff; - tau = q + q_size; - a = tau + tau_size; + a = mem_buff; + b = a + a_size; + s = b + b_size; params->M = m; - params->MC = m; - params->MN = min_m_n; + params->N = n; + params->NRHS = nrhs; params->A = a; - params->Q = q; - params->TAU = tau; + params->B = b; + params->S = s; params->LDA = lda; + params->LDB = ldb; { /* compute optimal work size */ @ftyp@ work_size_query; + @frealtyp@ rwork_size_query; + fortran_int iwork_size_query; params->WORK = &work_size_query; + params->IWORK = &iwork_size_query; + params->RWORK = &rwork_size_query; params->LWORK = -1; if (call_@lapack_func@(params) != 0) goto error; - work_count = (fortran_int) ((@ftyp@*)params->WORK)->r; + work_count = (fortran_int)work_size_query.r; + work_size = (size_t )work_size_query.r * sizeof(@ftyp@); + rwork_size = (size_t)rwork_size_query * sizeof(@frealtyp@); + iwork_size = (size_t)iwork_size_query * sizeof(fortran_int); } - params->LWORK = fortran_int_max(fortran_int_max(1, n), - work_count); - - work_size = (size_t) params->LWORK * sizeof(@ftyp@); - mem_buff2 = malloc(work_size); + mem_buff2 = malloc(work_size + rwork_size + iwork_size); if (!mem_buff2) goto error; work = mem_buff2; - memset(work, 0, work_size); + rwork = work + work_size; + iwork = rwork + rwork_size; params->WORK = work; + params->RWORK = rwork; + params->IWORK = iwork; params->LWORK = work_count; return 1; @@ -4029,53 +3976,105 @@ init_@lapack_func@_complete(GQR_PARAMS_t *params, /**end repeat**/ + /**begin repeat #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE# - #lapack_func=sorgqr,dorgqr,cungqr,zungqr# + #lapack_func=sgelsd,dgelsd,cgelsd,zgelsd# + #dot_func=sdot,ddot,cdotc,zdotc# #typ = npy_float, npy_double, npy_cfloat, npy_cdouble# #basetyp = npy_float, npy_double, npy_float, npy_double# #ftyp = fortran_real, fortran_doublereal, fortran_complex, fortran_doublecomplex# #cmplx = 0, 0, 1, 1# */ +static inline void +release_@lapack_func@(GELSD_PARAMS_t* params) +{ + /* A and WORK contain allocated blocks */ + free(params->A); + free(params->WORK); + memset(params, 0, sizeof(*params)); +} + +/** Compute the squared l2 norm of a contiguous vector */ +static @basetyp@ +@TYPE@_abs2(@typ@ *p, npy_intp n) { + npy_intp i; + @basetyp@ res = 0; + for (i = 0; i < n; i++) { + @typ@ el = p[i]; +#if @cmplx@ + res += el.real*el.real + el.imag*el.imag; +#else + res += el*el; +#endif + } + return res; +} + static void -@TYPE@_qr_complete(char **args, npy_intp const *dimensions, npy_intp const *steps, - void *NPY_UNUSED(func)) +@TYPE@_lstsq(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) { - GQR_PARAMS_t params; + GELSD_PARAMS_t params; int error_occurred = get_fp_invalid_and_clear(); - fortran_int n, m; - size_t safe_n, safe_m; + fortran_int n, m, nrhs; + fortran_int excess; - INIT_OUTER_LOOP_3 + INIT_OUTER_LOOP_7 m = (fortran_int)dimensions[0]; n = (fortran_int)dimensions[1]; - safe_n = n; - safe_m = m; - + nrhs = (fortran_int)dimensions[2]; + excess = m - n; - if (init_@lapack_func@_complete(¶ms, m, n)) { - LINEARIZE_DATA_t a_in, tau_in, q_out; + if (init_@lapack_func@(¶ms, m, n, nrhs)) { + LINEARIZE_DATA_t a_in, b_in, x_out, s_out, r_out; init_linearize_data(&a_in, n, m, steps[1], steps[0]); - init_linearize_data(&tau_in, 1, fortran_int_min(m, n), 1, steps[2]); - init_linearize_data(&q_out, m, m, steps[4], steps[3]); + init_linearize_data_ex(&b_in, nrhs, m, steps[3], steps[2], fortran_int_max(n, m)); + init_linearize_data_ex(&x_out, nrhs, n, steps[5], steps[4], fortran_int_max(n, m)); + init_linearize_data(&r_out, 1, nrhs, 1, steps[6]); + init_linearize_data(&s_out, 1, fortran_int_min(n, m), 1, steps[7]); - BEGIN_OUTER_LOOP_3 + BEGIN_OUTER_LOOP_7 int not_ok; linearize_@TYPE@_matrix(params.A, args[0], &a_in); - memcpy(params.Q, params.A, safe_n * safe_m * sizeof(@ftyp@)); - linearize_@TYPE@_matrix(params.TAU, args[1], &tau_in); + linearize_@TYPE@_matrix(params.B, args[1], &b_in); + params.RCOND = args[2]; not_ok = call_@lapack_func@(¶ms); if (!not_ok) { - delinearize_@TYPE@_matrix(args[1], params.TAU, &tau_in); - delinearize_@TYPE@_matrix(args[2], params.Q, &q_out); + delinearize_@TYPE@_matrix(args[3], params.B, &x_out); + *(npy_int*) args[5] = params.RANK; + delinearize_@REALTYPE@_matrix(args[6], params.S, &s_out); + + /* Note that linalg.lstsq discards this when excess == 0 */ + if (excess >= 0 && params.RANK == n) { + /* Compute the residuals as the square sum of each column */ + int i; + char *resid = args[4]; + @ftyp@ *components = (@ftyp@ *)params.B + n; + for (i = 0; i < nrhs; i++) { + @ftyp@ *vector = components + i*m; + /* Numpy and fortran floating types are the same size, + * so this cast is safe */ + @basetyp@ abs2 = @TYPE@_abs2((@typ@ *)vector, excess); + memcpy( + resid + i*r_out.column_strides, + &abs2, sizeof(abs2)); + } + } + else { + /* Note that this is always discarded by linalg.lstsq */ + nan_@REALTYPE@_matrix(args[4], &r_out); + } } else { error_occurred = 1; - nan_@TYPE@_matrix(args[1], &tau_in); - nan_@TYPE@_matrix(args[2], &q_out); + nan_@TYPE@_matrix(args[3], &x_out); + nan_@REALTYPE@_matrix(args[4], &r_out); + *(npy_int*) args[5] = -1; + nan_@REALTYPE@_matrix(args[6], &s_out); } END_OUTER_LOOP @@ -4157,10 +4156,10 @@ GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(lstsq); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(qr_r_raw); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(qr_reduced); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(qr_complete); +GUFUNC_FUNC_ARRAY_REAL_COMPLEX(lstsq); GUFUNC_FUNC_ARRAY_EIG(eig); GUFUNC_FUNC_ARRAY_EIG(eigvals); @@ -4226,14 +4225,6 @@ static char svd_1_3_types[] = { NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE }; -/* A, b, rcond, x, resid, rank, s, */ -static char lstsq_types[] = { - NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT, - NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE, - NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT, - NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE, -}; - /* A, tau */ static char qr_r_raw_types[] = { NPY_FLOAT, NPY_FLOAT, @@ -4258,6 +4249,14 @@ static char qr_complete_types[] = { NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, }; +/* A, b, rcond, x, resid, rank, s, */ +static char lstsq_types[] = { + NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT, + NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE, + NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT, NPY_FLOAT, NPY_INT, NPY_FLOAT, + NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_INT, NPY_DOUBLE, +}; + typedef struct gufunc_descriptor_struct { char *name; char *signature; @@ -4450,24 +4449,6 @@ GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { eigvals_types }, { - "lstsq_m", - "(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(m)", - "least squares on the last two dimensions and broadcast to the rest. \n"\ - "For m <= n. \n", - 4, 3, 4, - FUNC_ARRAY_NAME(lstsq), - lstsq_types - }, - { - "lstsq_n", - "(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(n)", - "least squares on the last two dimensions and broadcast to the rest. \n"\ - "For m >= n, meaning that residuals are produced. \n", - 4, 3, 4, - FUNC_ARRAY_NAME(lstsq), - lstsq_types - }, - { "qr_r_raw_m", "(m,n)->(m)", "Compute TAU vector for the last two dimensions \n"\ @@ -4520,6 +4501,24 @@ GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { 4, 2, 1, FUNC_ARRAY_NAME(qr_complete), qr_complete_types + }, + { + "lstsq_m", + "(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(m)", + "least squares on the last two dimensions and broadcast to the rest. \n"\ + "For m <= n. \n", + 4, 3, 4, + FUNC_ARRAY_NAME(lstsq), + lstsq_types + }, + { + "lstsq_n", + "(m,n),(m,nrhs),()->(n,nrhs),(nrhs),(),(n)", + "least squares on the last two dimensions and broadcast to the rest. \n"\ + "For m >= n, meaning that residuals are produced. \n", + 4, 3, 4, + FUNC_ARRAY_NAME(lstsq), + lstsq_types } }; |