diff options
Diffstat (limited to 'numpy/linalg/umath_linalg.c.src')
-rw-r--r-- | numpy/linalg/umath_linalg.c.src | 1187 |
1 files changed, 26 insertions, 1161 deletions
diff --git a/numpy/linalg/umath_linalg.c.src b/numpy/linalg/umath_linalg.c.src index 3d9157dd9..3c2b316e2 100644 --- a/numpy/linalg/umath_linalg.c.src +++ b/numpy/linalg/umath_linalg.c.src @@ -819,9 +819,17 @@ linearize_@TYPE@_matrix(void *dst_in, (fortran_int)(data->column_strides/sizeof(@typ@)); fortran_int one = 1; for (i=0; i< data->rows; i++) { - FNAME(@copy@)(&columns, - (void*)src, &column_strides, - (void*)dst, &one); + if (column_strides >= 0) { + FNAME(@copy@)(&columns, + (void*)src, &column_strides, + (void*)dst, &one); + } + else { + FNAME(@copy@)(&columns, + (void*)((@typ@*)src + (columns-1)*column_strides), + &column_strides, + (void*)dst, &one); + } src += data->row_strides/sizeof(@typ@); dst += data->columns; } @@ -847,9 +855,17 @@ delinearize_@TYPE@_matrix(void *dst_in, (fortran_int)(data->column_strides/sizeof(@typ@)); fortran_int one = 1; for (i=0; i < data->rows; i++) { - FNAME(@copy@)(&columns, - (void*)src, &one, - (void*)dst, &column_strides); + if (column_strides >= 0) { + FNAME(@copy@)(&columns, + (void*)src, &one, + (void*)dst, &column_strides); + } + else { + FNAME(@copy@)(&columns, + (void*)src, &one, + (void*)((@typ@*)dst + (columns-1)*column_strides), + &column_strides); + } src += data->columns; dst += data->row_strides/sizeof(@typ@); } @@ -877,45 +893,6 @@ nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data) } } -static inline void -make_symmetric_@TYPE@_matrix(char UPLO, - fortran_int n, - void *matrix) -{ - /* - note: optimized assuming that sequential write is better than - sequential read - */ - fortran_int i; - fortran_int one = 1; - - /* note: 'L' and 'U' are interpreted considering *fortran* order */ - if ('L' == UPLO) { - @typ@ *dst = (@typ@ *) matrix; - @typ@ *src = (@typ@ *) matrix; - src += 1; - dst += n; - for (i = 1; i < n; ++i) { - FNAME(@copy@)(&i, - (void *)src, &n, - (void *)dst, &one); - src += 1; - dst += n; - } - } else { /* assume U */ - @typ@ *dst = (@typ@ *) matrix; - @typ@ *src = (@typ@ *) matrix; - src += n; - dst += 1; - for (i = n - 1; i > 0; --i) { - FNAME(@copy@)(&i, - (void *)src, &n, - (void *)dst, &one); - src += n + 1; - dst += n + 1; - } - } -} /**end repeat**/ /* identity square matrix generation */ @@ -947,19 +924,6 @@ identity_@TYPE@_matrix(void *ptr, size_t n) #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# #cblas_type=s,d,c,z# */ -static inline void -tril_@TYPE@_matrix(void *ptr, size_t n) -{ - size_t i,j; - @typ@ *matrix = (@typ@*)ptr; - matrix++; - for (i = n-1; i > 0; --i) { - for (j = 0; j < i; ++j) { - *matrix = @cblas_type@_zero; - } - matrix += n + 1; - } -} static inline void triu_@TYPE@_matrix(void *ptr, size_t n) @@ -976,337 +940,6 @@ triu_@TYPE@_matrix(void *ptr, size_t n) } /**end repeat**/ -/* - ***************************************************************************** - ** UFUNC LOOPS ** - ***************************************************************************** - */ - -/**begin repeat - #typ=npy_float,npy_double,COMPLEX_t,DOUBLECOMPLEX_t,COMPLEX_t,DOUBLECOMPLEX_t# - #ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex,fortran_complex,fortran_doublecomplex# - #add=FLOAT_add,DOUBLE_add,CFLOAT_add,CDOUBLE_add,CFLOAT_add,CDOUBLE_add# - #mul=FLOAT_mul,DOUBLE_mul,CFLOAT_mul,CDOUBLE_mul,CFLOAT_mulc,CDOUBLE_mulc# - #dot=sdot,ddot,cdotu,zdotu,cdotc,zdotc# - #zero=s_zero, d_zero,c_zero,z_zero,c_zero,z_zero# - */ - -static inline void -@dot@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps) -{ - const size_t sot = sizeof(@typ@); - int dim = (int)dimensions[0]; - INIT_OUTER_LOOP_3 - /* - * use blas if the stride is a multiple of datatype size in the inputs - * it should be the common case - */ - if ((0 == (steps[3] % sot)) && - (0 == (steps[4] % sot))) { - /* use blas */ - - int is1 = (int)(steps[0]/sot), is2 = (int)(steps[1]/sot); - BEGIN_OUTER_LOOP_3 - @ftyp@ * ip1 = (@ftyp@*)args[0], *ip2 = (@ftyp@*)args[1]; - *(@ftyp@*)(args[2]) = BLAS(@dot@)(&dim, ip1, &is1, ip2, &is2); - END_OUTER_LOOP - } else { - /* use standard version */ - npy_intp is1=steps[0], is2=steps[1]; - BEGIN_OUTER_LOOP_3 - int i; - char *ip1=args[0], *ip2=args[1], *op=args[2]; - @typ@ sum = @zero@; - for (i = 0; i < dim; i++) { - @typ@ prod = @mul@(*(@typ@ *)ip1, *(@typ@*)ip2); - sum = @add@(sum, prod); - ip1 += is1; - ip2 += is2; - } - *(@typ@ *)op = sum; - END_OUTER_LOOP - } -} - -/**end repeat**/ - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #dot=sdot,ddot,cdotu,zdotu# - #dotc=sdot,ddot,cdotc,zdotc# - */ -static void -@TYPE@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps, - void *NPY_UNUSED(func)) -{ - @dot@_inner1d(args, dimensions, steps); -} - -static void -@TYPE@_dotc1d(char **args, npy_intp *dimensions, npy_intp *steps, - void *NPY_UNUSED(func)) -{ - @dotc@_inner1d(args, dimensions, steps); -} -/**end repeat**/ - -/* -------------------------------------------------------------------------- */ - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #typ=npy_float,npy_double,COMPLEX_t,DOUBLECOMPLEX_t# - #add=FLOAT_add,DOUBLE_add,CFLOAT_add,CDOUBLE_add# - #mul=FLOAT_mul,DOUBLE_mul,CFLOAT_mul,CDOUBLE_mul# - #zero=s_zero, d_zero,c_zero,z_zero# -*/ - -/* - * This implements the function - * out[n] = sum_i { in1[n, i] * in2[n, i] * in3[n, i] }. - */ - -static void -@TYPE@_innerwt(char **args, - npy_intp *dimensions, - npy_intp *steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_4 - npy_intp di = dimensions[0]; - npy_intp i; - npy_intp is1=steps[0], is2=steps[1], is3=steps[2]; - BEGIN_OUTER_LOOP_4 - char *ip1=args[0], *ip2=args[1], *ip3=args[2], *op=args[3]; - @typ@ sum = @zero@; - for (i = 0; i < di; i++) { - @typ@ prod = @mul@(@mul@(*(@typ@*)ip1, *(@typ@*)ip2),*(@typ@*)ip3); - sum = @add@(sum, prod); - ip1 += is1; - ip2 += is2; - ip3 += is3; - } - *(@typ@ *)op = sum; - END_OUTER_LOOP -} - -/**end repeat**/ - - -/* -------------------------------------------------------------------------- */ - /* Matrix Multiply */ - -typedef struct gemm_params_struct -{ - void *A; - void *B; - void *C; - fortran_int LDA; - fortran_int LDB; - fortran_int LDC; - fortran_int M; - fortran_int K; - fortran_int N; - char TRANSA; - char TRANSB; - - void *allocated_data; -} GEMM_PARAMS_t; - -static inline void -dump_gemm_params(const char* name, const GEMM_PARAMS_t* params) -{ - TRACE_TXT("\n%s\n" - "%14s: %18p\n" "%14s: %18p\n" "%14s: %18p\n" - "%14s: %18d\n" "%14s: %18d\n" "%14s: %18d\n" - "%14s: %18d\n" "%14s: %18d\n" "%14s: %18d\n" - "%14s: %15c'%c'\n" "%14s: %15c'%c'\n", - name, - "A", params->A, "B", params->B, "C", params->C, - "LDA", params->LDA, "LDB", params->LDB, "LDC", params->LDC, - "M", params->M, "K", params->K, "N", params->N, - "TRANSA", ' ', params->TRANSA, "TRANSB", ' ', params->TRANSB); -} - - -typedef struct _matrix_desc { - int need_buff; - fortran_int lead; - size_t size; - void *buff; -} matrix_desc; - -static inline void -matrix_desc_init(matrix_desc *mtxd, - npy_intp* steps, - size_t sot, - fortran_int rows, - fortran_int columns) -/* initialized a matrix desc based on the information in steps - it will fill the matrix desc with the values needed. - steps[0] contains column step - steps[1] contains row step -*/ -{ - /* if column step is not element step, a copy will be needed - to arrange the array in a way compatible with blas - */ - mtxd->need_buff = steps[0] != sot; - - if (!mtxd->need_buff) { - if (steps[1]) { - mtxd->lead = (fortran_int) steps[1] / sot; - } else { - /* step is 0, this means either it is only 1 column or - there is a step trick around*/ - if (columns > 1) { - /* step tricks not supported in gemm... make a copy */ - mtxd->need_buff = 1; - } else { - /* lead must be at least 1 */ - mtxd->lead = rows; - } - } - } - - /* if a copy is performed, the lead is the number of columns */ - if (mtxd->need_buff) { - mtxd->lead = rows; - } - - mtxd->size = rows*columns*sot*mtxd->need_buff; - mtxd->buff = NULL; -} - -static inline npy_uint8 * -matrix_desc_assign_buff(matrix_desc* mtxd, npy_uint8 *p) -{ - if (mtxd->need_buff) { - mtxd->buff = p; - return p + mtxd->size; - } else { - return p; - } -} - -static inline int -init_gemm_params(GEMM_PARAMS_t *params, - fortran_int m, - fortran_int k, - fortran_int n, - npy_intp* steps, - size_t sot) -{ - npy_uint8 *mem_buff = NULL; - matrix_desc a, b, c; - matrix_desc_init(&a, steps + 0, sot, m, k); - matrix_desc_init(&b, steps + 2, sot, k, n); - matrix_desc_init(&c, steps + 4, sot, m, n); - - if (a.size + b.size + c.size) - { - npy_uint8 *current; - /* need to allocate some buffer */ - mem_buff = malloc(a.size + b.size + c.size); - if (!mem_buff) - goto error; - - current = mem_buff; - current = matrix_desc_assign_buff(&a, current); - current = matrix_desc_assign_buff(&b, current); - current = matrix_desc_assign_buff(&c, current); - } - - params->A = a.buff; - params->B = b.buff; - params->C = c.buff; - params->LDA = a.lead; - params->LDB = b.lead; - params->LDC = c.lead; - params->M = m; - params->N = n; - params->K = k; - params->TRANSA='N'; - params->TRANSB='N'; - - params->allocated_data = mem_buff; - - return 1; - error: - free(mem_buff); - memset(params, 0, sizeof(*params)); - return 0; -} - -static inline void -release_gemm_params(GEMM_PARAMS_t * params) -{ - free(params->allocated_data); - params->allocated_data = NULL; -} - - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #typ=npy_float,npy_double,npy_cfloat,npy_cdouble# - #one=s_one,d_one,c_one.array,z_one.array# - #zero=s_zero,d_zero,c_zero.array,z_zero.array# - #GEMM=sgemm,dgemm,cgemm,zgemm# -*/ - -static void -@TYPE@_matrix_multiply(char **args, - npy_intp *dimensions, - npy_intp *steps, - void *NPY_UNUSED(func)) -{ - /* - * everything is setup in a way that makes things work. BLAS gemm can be - * be called without rearranging nor using weird stuff, as matrices are - * in the expected way in memory. - * This is just a loop calling blas. - */ - GEMM_PARAMS_t params; - fortran_int m, k, n; - - INIT_OUTER_LOOP_3 - - m = (fortran_int) dimensions[0]; - k = (fortran_int) dimensions[1]; - n = (fortran_int) dimensions[2]; - - if (init_gemm_params(¶ms, m, k, n, steps, sizeof(@typ@))) - { - LINEARIZE_DATA_t a_in, b_in, c_out; - - if (params.A) - init_linearize_data(&a_in, k, m, steps[1], steps[0]); - if (params.B) - init_linearize_data(&b_in, n, k, steps[3], steps[2]); - if (params.C) - init_linearize_data(&c_out, n, m, steps[5], steps[4]); - - BEGIN_OUTER_LOOP_3 - /* just call the appropriate multiply and update pointers */ - @typ@ *A = linearize_@TYPE@_matrix(params.A, args[0], &a_in); - @typ@ *B = linearize_@TYPE@_matrix(params.B, args[1], &b_in); - @typ@ *C = params.C ? (@typ@*)params.C : (@typ@ *) args[2]; - - /* linearize source operands if needed */ - FNAME(@GEMM@)(¶ms.TRANSA, ¶ms.TRANSB, - ¶ms.M, ¶ms.N, ¶ms.K, - (void*)&@one@, // alpha - (void*)A, ¶ms.LDA, - (void*)B, ¶ms.LDB, - (void*)&@zero@, // beta - (void*)C, ¶ms.LDC); - delinearize_@TYPE@_matrix(args[2], params.C, &c_out); - END_OUTER_LOOP - - release_gemm_params(¶ms); - } -} -/**end repeat**/ - /* -------------------------------------------------------------------------- */ /* Determinants */ @@ -2152,6 +1785,8 @@ static void fortran_int n; INIT_OUTER_LOOP_2 + assert(uplo == 'L'); + n = (fortran_int)dimensions[0]; if (init_@lapack_func@(¶ms, uplo, n)) { @@ -2177,21 +1812,12 @@ static void } static void -@TYPE@_cholesky_up(char **args, npy_intp *dimensions, npy_intp *steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_cholesky('U', args, dimensions, steps); -} - -static void @TYPE@_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { @TYPE@_cholesky('L', args, dimensions, steps); } - - /**end repeat**/ /* -------------------------------------------------------------------------- */ @@ -2590,6 +2216,8 @@ static inline void int error_occurred = get_fp_invalid_and_clear(); GEEV_PARAMS_t geev_params; + assert(JOBVL == 'N'); + STACK_TRACE; op_count += 'V'==JOBVL?1:0; op_count += 'V'==JOBVR?1:0; @@ -3140,581 +2768,6 @@ static void /**end repeat**/ -/* -------------------------------------------------------------------------- */ - /* some basic ufuncs */ - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# - */ - -static void -@TYPE@_add3(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_4 - BEGIN_OUTER_LOOP_4 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ tmp; - tmp = @TYPE@_add(*op1p, *op2p); - *op4p = @TYPE@_add(tmp, *op3p); - END_OUTER_LOOP -} - -static void -@TYPE@_multiply3(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_4 - BEGIN_OUTER_LOOP_4 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ tmp; - tmp = @TYPE@_mul(*op1p, *op2p); - *op4p = @TYPE@_mul(tmp, *op3p); - END_OUTER_LOOP -} - -static void -@TYPE@_multiply3_add(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_5 - BEGIN_OUTER_LOOP_5 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ *op5p = (@typ@ *) args[4]; - @typ@ tmp, tmp2; - - tmp = @TYPE@_mul(*op1p, *op2p); - tmp2 = @TYPE@_mul(tmp, *op3p); - *op5p = @TYPE@_add(tmp2, *op4p); - END_OUTER_LOOP -} - -static void -@TYPE@_multiply_add(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_4 - BEGIN_OUTER_LOOP_4 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ tmp; - tmp = @TYPE@_mul(*op1p, *op2p); - *op4p = @TYPE@_add(tmp, *op3p); - END_OUTER_LOOP -} - -static void -@TYPE@_multiply_add2(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_5 - BEGIN_OUTER_LOOP_5 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ *op5p = (@typ@ *) args[4]; - @typ@ tmp, tmp2; - tmp = @TYPE@_mul(*op1p, *op2p); - tmp2 = @TYPE@_add(*op3p, *op4p); - *op5p = @TYPE@_add(tmp, tmp2); - END_OUTER_LOOP -} - -static void -@TYPE@_multiply4(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_5 - BEGIN_OUTER_LOOP_5 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ *op5p = (@typ@ *) args[4]; - @typ@ tmp, tmp2; - tmp = @TYPE@_mul(*op1p, *op2p); - tmp2 = @TYPE@_mul(*op3p, *op4p); - *op5p = @TYPE@_mul(tmp, tmp2); - END_OUTER_LOOP -} - -static void -@TYPE@_multiply4_add(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_6 - BEGIN_OUTER_LOOP_6 - @typ@ *op1p = (@typ@ *) args[0]; - @typ@ *op2p = (@typ@ *) args[1]; - @typ@ *op3p = (@typ@ *) args[2]; - @typ@ *op4p = (@typ@ *) args[3]; - @typ@ *op5p = (@typ@ *) args[4]; - @typ@ *op6p = (@typ@ *) args[5]; - @typ@ tmp, tmp2, tmp3; - tmp = @TYPE@_mul(*op1p, *op2p); - tmp2 = @TYPE@_mul(*op3p, *op4p); - tmp3 = @TYPE@_mul(tmp, tmp2); - *op6p = @TYPE@_add(tmp3, *op5p); - END_OUTER_LOOP -} - -/**end repeat**/ - -/* -------------------------------------------------------------------------- */ - /* quadratic form */ - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t# - #ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex# - #zero=s_zero,d_zero,c_zero,z_zero# - #blas_dot=FNAME(sdot),FNAME(ddot),FNAME(cdotu),FNAME(zdotu)# - */ - -static inline @typ@ -@TYPE@_dot_blas(size_t count, - void *X, intptr_t X_byte_step, - void *Y, intptr_t Y_byte_step) -{ - @ftyp@ result; - fortran_int N = (fortran_int) count; - fortran_int INCX = X_byte_step/sizeof(@ftyp@); - fortran_int INCY = Y_byte_step/sizeof(@ftyp@); - - result = @blas_dot@(&N, X, &INCX, Y, &INCY); - - return *(@typ@ *)&result; -} - -static inline @typ@ -@TYPE@_dot_std(size_t count, - void *X, intptr_t X_byte_step, - void *Y, intptr_t Y_byte_step) -{ - size_t i; - @typ@ acc, *x_ptr, *y_ptr; - x_ptr = (@typ@ *)X; - y_ptr = (@typ@ *)Y; - - acc = @TYPE@_mul(*x_ptr, *y_ptr); - x_ptr = offset_ptr(x_ptr, X_byte_step); - y_ptr = offset_ptr(y_ptr, Y_byte_step); - - for (i = 1; i < count; i++) { - @typ@ tmp; - - tmp = @TYPE@_mul(*x_ptr, *y_ptr); - acc = @TYPE@_add(acc, tmp); - - x_ptr = offset_ptr(x_ptr, X_byte_step); - y_ptr = offset_ptr(y_ptr, Y_byte_step); - } - - return acc; -} - -/* uQv, where u and v are vectors and Q is a matrix */ -static void -@TYPE@_quadratic_form(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - INIT_OUTER_LOOP_4 - size_t m = (size_t)dimensions[0]; - size_t n = (size_t)dimensions[1]; - ptrdiff_t u_step = (ptrdiff_t)steps[0]; - ptrdiff_t Q_row_step = (ptrdiff_t)steps[1]; - ptrdiff_t Q_column_step = (ptrdiff_t)steps[2]; - ptrdiff_t v_step = (ptrdiff_t)steps[3]; - - if ((0 == (Q_row_step % sizeof(@ftyp@))) && - (0 == (u_step % sizeof(@ftyp@)))) { - /* use blas loops for dot */ - BEGIN_OUTER_LOOP_4 - size_t column; - npy_uint8 *u, *Q, *v; - @typ@ *r; - @typ@ accum = @zero@; - - u = (npy_uint8 *)args[0]; /* u (m) [in] */ - Q = (npy_uint8 *)args[1]; /* Q (m,n) [in] */ - v = (npy_uint8 *)args[2]; /* v (n) [in] */ - r = (@typ@ *)args[3]; /* result [out] */ - /* sum (compute u * Q[i] * v[i]) for all i. - Q[i] are the different columns on Q */ - - for (column = 0; column < n; ++column) { - @typ@ tmp, tmp2; - tmp = @TYPE@_dot_blas(m, - Q, Q_row_step, - u, u_step); - tmp2 = @TYPE@_mul(tmp, *(@typ@*)v); - accum = @TYPE@_add(accum, tmp2); - Q += Q_column_step; - v += v_step; - } - - *r = accum; - END_OUTER_LOOP - } else { - BEGIN_OUTER_LOOP_4 - size_t column; - npy_uint8 *u, *Q, *v; - @typ@ *r; - @typ@ accum = @zero@; - u = (npy_uint8 *)args[0]; /* u (m) [in] */ - Q = (npy_uint8 *)args[1]; /* Q (m,n) [in] */ - v = (npy_uint8 *)args[2]; /* v (n) [in] */ - r = (@typ@ *)args[3]; /* result [out] */ - /* sum (compute u * Q[i] * v[i]) for all i. - Q[i] are the different columns on Q */ - - for (column = 0; column < n; ++column) { - @typ@ tmp, tmp2; - tmp = @TYPE@_dot_std(m, Q, Q_row_step, u, u_step); - tmp2 = @TYPE@_mul(tmp, *(@typ@*)v); - accum = @TYPE@_add(accum, tmp2); - Q += Q_column_step; - v += v_step; - } - - *r = accum; - END_OUTER_LOOP - } -} -/**end repeat**/ - -/* -------------------------------------------------------------------------- */ - /* chosolve (using potrs) */ - -typedef struct potrs_params_struct { - void *A; - void *B; - fortran_int N; - fortran_int NRHS; - fortran_int LDA; - fortran_int LDB; - char UPLO; -} POTRS_PARAMS_t; - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #ftyp=float,double,COMPLEX_t,DOUBLECOMPLEX_t# - #lapack_func_2=spotrf,dpotrf,cpotrf,zpotrf# - #lapack_func=spotrs,dpotrs,cpotrs,zpotrs# - */ - - -static inline int -init_@lapack_func@(POTRS_PARAMS_t *params, - char UPLO, - fortran_int N, - fortran_int NRHS) -{ - npy_uint8 *mem_buff = NULL; - npy_uint8 *a, *b; - size_t a_size, b_size; - - a_size = N*N*sizeof(@ftyp@); - b_size = N*NRHS*sizeof(@ftyp@); - mem_buff = malloc(a_size + b_size); - if (!mem_buff) - goto error; - - a = mem_buff; - b = a + a_size; - - params->A = (void*)a; - params->B = (void*)b; - params->N = N; - params->NRHS = NRHS; - params->LDA = N; - params->LDB = N; - params->UPLO = UPLO; - - return 1; - - error: - free(mem_buff); - memset(params, 0, sizeof(*params)); - - return 0; -} - -static inline void -release_@lapack_func@(POTRS_PARAMS_t *params) -{ - free(params->A); - memset(params,0,sizeof(*params)); - return; -} - -static inline int -call_@lapack_func@(POTRS_PARAMS_t *params) -{ - fortran_int rv; - LAPACK(@lapack_func_2@)(¶ms->UPLO, - ¶ms->N, - params->A, ¶ms->LDA, - &rv); - if (0 != rv) - return rv; - - LAPACK(@lapack_func@)(¶ms->UPLO, - ¶ms->N, ¶ms->NRHS, - params->A, ¶ms->LDA, - params->B, ¶ms->LDB, - &rv); - return rv; -} - - -/* uplo: either 'U' or 'L' - ndim: use 1 to get nrhs from dimensions (matrix), 0 to use 1 (vector) - */ -static void -@TYPE@_chosolve(char uplo, char ndim, - char **args, - npy_intp *dimensions, - npy_intp* steps - ) -{ - POTRS_PARAMS_t params; - int error_occurred = get_fp_invalid_and_clear(); - fortran_int n, nrhs; - INIT_OUTER_LOOP_3 - - n = (fortran_int)dimensions[0]; - nrhs = ndim?(fortran_int)dimensions[1]:1; - if (init_@lapack_func@(¶ms, uplo, n, nrhs)) { - LINEARIZE_DATA_t a_in, b_in, r_out; - - init_linearize_data(&a_in, n, n, steps[1], steps[0]); - if (ndim) { - init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]); - init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]); - } else { - init_linearize_data(&b_in, 1, n, 0, steps[2]); - init_linearize_data(&r_out, 1, n, 0, steps[3]); - } - - BEGIN_OUTER_LOOP_3 - int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - linearize_@TYPE@_matrix(params.B, args[1], &b_in); - not_ok = call_@lapack_func@(¶ms); - if (!not_ok) { - delinearize_@TYPE@_matrix(args[2], params.B, &r_out); - } else { - error_occurred = 1; - nan_@TYPE@_matrix(args[2], &r_out); - } - END_OUTER_LOOP - - release_@lapack_func@(¶ms); - } - - set_fp_invalid_or_clear(error_occurred); -} - -static void -@TYPE@_chosolve_up(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_chosolve('U', 1, args, dimensions, steps); -} - -static void -@TYPE@_chosolve_lo(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_chosolve('L', 1, args, dimensions, steps); -} - -static void -@TYPE@_chosolve1_up(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_chosolve('U', 0, args, dimensions, steps); -} - -static void -@TYPE@_chosolve1_lo(char **args, - npy_intp *dimensions, - npy_intp* steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_chosolve('L', 0, args, dimensions, steps); -} - -/**end repeat**/ - -/* -------------------------------------------------------------------------- */ - /* poinv */ - -typedef struct potri_params_struct { - void *A; - fortran_int N; - fortran_int LDA; - char UPLO; -} POTRI_PARAMS_t; - - -/**begin repeat - #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE# - #ftyp=float,double,COMPLEX_t,DOUBLECOMPLEX_t# - #potrf=spotrf,dpotrf,cpotrf,zpotrf# - #potri=spotri,dpotri,cpotri,zpotri# - */ -static inline int -init_@potri@(POTRI_PARAMS_t *params, - char UPLO, - fortran_int N) -{ - npy_uint8 *mem_buff = NULL; - npy_uint8 *a; - size_t a_size; - - a_size = N*N*sizeof(@ftyp@); - mem_buff = malloc(a_size); - if (!mem_buff) - goto error; - - a = mem_buff; - - params->A = (void*)a; - params->N = N; - params->LDA = N; - params->UPLO = UPLO; - - return 1; - - error: - free(mem_buff); - memset(params, 0, sizeof(*params)); - - return 0; -} - -static inline void -release_@potri@(POTRI_PARAMS_t *params) -{ - free(params->A); - memset(params,0,sizeof(*params)); - return; -} - -static inline int -call_@potri@(POTRI_PARAMS_t *params) -{ - fortran_int rv; - LAPACK(@potrf@)(¶ms->UPLO, - ¶ms->N, - params->A, ¶ms->LDA, - &rv); - if (0!= rv) { - return rv; - } - - LAPACK(@potri@)(¶ms->UPLO, - ¶ms->N, - params->A, ¶ms->LDA, - &rv); - return rv; -} - - -static inline void -@TYPE@_poinv(char uplo, - char **args, - npy_intp *dimensions, - npy_intp *steps) -{ - POTRI_PARAMS_t params; - int error_occurred = get_fp_invalid_and_clear(); - fortran_int n; - INIT_OUTER_LOOP_2 - - n = (fortran_int)dimensions[0]; - if (init_@potri@(¶ms, uplo, n)) { - LINEARIZE_DATA_t a_in, r_out; - - init_linearize_data(&a_in, n, n, steps[1], steps[0]); - init_linearize_data(&r_out, n, n, steps[3], steps[2]); - - BEGIN_OUTER_LOOP_2 - int not_ok; - linearize_@TYPE@_matrix(params.A, args[0], &a_in); - not_ok = call_@potri@(¶ms); - if (!not_ok) { - make_symmetric_@TYPE@_matrix(params.UPLO, params.N, params.A); - delinearize_@TYPE@_matrix(args[1], params.A, &r_out); - } else { - error_occurred = 1; - nan_@TYPE@_matrix(args[1], &r_out); - } - END_OUTER_LOOP - - release_@potri@(¶ms); - } - - set_fp_invalid_or_clear(error_occurred); -} - -static void -@TYPE@_poinv_up(char **args, - npy_intp *dimensions, - npy_intp *steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_poinv('U', args, dimensions, steps); -} - -static void -@TYPE@_poinv_lo(char **args, - npy_intp *dimensions, - npy_intp *steps, - void *NPY_UNUSED(func)) -{ - @TYPE@_poinv('L', args, dimensions, steps); -} - -/**end repeat**/ - #pragma GCC diagnostic pop /* -------------------------------------------------------------------------- */ @@ -3772,10 +2825,6 @@ static void *array_of_nulls[] = { } -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inner1d); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(dotc1d); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(innerwt); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(matrix_multiply); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo); @@ -3785,27 +2834,12 @@ GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_up); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S); GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A); GUFUNC_FUNC_ARRAY_EIG(eig); GUFUNC_FUNC_ARRAY_EIG(eigvals); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(quadratic_form); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(add3); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply3); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply3_add); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply_add); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply_add2); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply4); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply4_add); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve_up); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve_lo); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve1_up); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve1_lo); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(poinv_up); -GUFUNC_FUNC_ARRAY_REAL_COMPLEX(poinv_lo); static char equal_2_types[] = { NPY_FLOAT, NPY_FLOAT, @@ -3903,43 +2937,6 @@ typedef struct gufunc_descriptor_struct { GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { { - "inner1d", - "(i),(i)->()", - "inner on the last dimension and broadcast on the rest \n"\ - " \"(i),(i)->()\" \n", - 4, 2, 1, - FUNC_ARRAY_NAME(inner1d), - equal_3_types - }, - { - "dotc1d", - "(i),(i)->()", - "inner on the last dimension and broadcast on the rest \n"\ - " \"(i),(i)->()\" \n", - 4, 2, 1, - FUNC_ARRAY_NAME(dotc1d), - equal_3_types - }, - { - "innerwt", - "(i),(i),(i)->()", - "inner on the last dimension using 3 operands and broadcast on the"\ - " rest \n"\ - " \"(i),(i),(i)->()\" \n", - 2, 3, 1, - FUNC_ARRAY_NAME(innerwt), - equal_4_types - }, - { - "matrix_multiply", - "(m,k),(k,n)->(m,n)", - "dot on the last two dimensions and broadcast on the rest \n"\ - " \"(m,k),(k,n)->(m,n)\" \n", - 4, 2, 1, - FUNC_ARRAY_NAME(matrix_multiply), - equal_3_types - }, - { "slogdet", "(m,m)->(),()", "slogdet on the last two dimensions and broadcast on the rest. \n"\ @@ -4041,16 +3038,6 @@ GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { equal_2_types }, { - "cholesky_up", - "(m,m)->(m,m)", - "cholesky decomposition of hermitian positive-definite matrices. \n"\ - "Broadcast to all outer dimensions. \n"\ - " \"(m,m)->(m,m)\" \n", - 4, 1, 1, - FUNC_ARRAY_NAME(cholesky_up), - equal_2_types - }, - { "cholesky_lo", "(m,m)->(m,m)", "cholesky decomposition of hermitian positive-definite matrices. \n"\ @@ -4129,128 +3116,6 @@ GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = { FUNC_ARRAY_NAME(eigvals), eigvals_types }, - { - "quadratic_form", - "(m),(m,n),(n)->()", - "computes the quadratic form uQv in the inner dimensions, broadcast"\ - "to the rest \n"\ - "Results in the result of uQv for each element of the broadcasted"\ - "dimensions. \n"\ - " \"(m),(m,n),(n)->()", - 4,3,1, - FUNC_ARRAY_NAME(quadratic_form), - equal_4_types - }, - { - "add3", - "(),(),()->()", - "3-way element-wise addition. a,b,c -> a+b+c for all elements. \n", - 4,3,1, - FUNC_ARRAY_NAME(add3), - equal_4_types - }, - { - "multiply3", - "(),(),()->()", - "3-way element-wise product. a,b,c -> a*b*c for all elements. \n", - 4,3,1, - FUNC_ARRAY_NAME(multiply3), - equal_4_types - }, - { - "multiply3_add", - "(),(),(),()->()", - "3-way element-wise product plus addition. a,b,c,d -> a*b*c+d"\ - " for all elements. \n", - 4,4,1, - FUNC_ARRAY_NAME(multiply3_add), - equal_5_types - }, - { - "multiply_add", - "(),(),()->()", - "element-wise multiply-add. a,b,c -> a*b+c for all elements. \n", - 4,3,1, - FUNC_ARRAY_NAME(multiply_add), - equal_4_types - }, - { - "multiply_add2", - "(),(),(),()->()", - "element-wise product with 2 additions. a,b,c,d -> a*b+c+d for"\ - " all elements. \n", - 4,4,1, - FUNC_ARRAY_NAME(multiply_add2), - equal_5_types - }, - { - "multiply4", - "(),(),(),()->()", - "4-way element-wise product. a,b,c,d -> a*b*c*d for all elements. \n", - 4,4,1, - FUNC_ARRAY_NAME(multiply4), - equal_5_types - }, - { - "multiply4_add", - "(),(),(),(),()->()", - "4-way element-wise product and addition. a,b,c,d,e -> a*b*c*d+e. \n", - 4,5,1, - FUNC_ARRAY_NAME(multiply4_add), - equal_6_types - }, - { /* solve using cholesky decomposition (lapack potrs) */ - "chosolve_up", - "(m,m),(m,n)->(m,n)", - "solve for symmetric/hermitian matrices using cholesky"\ - " factorization. \n", - 4,2,1, - FUNC_ARRAY_NAME(chosolve_up), - equal_3_types - }, - { /* solve using choleske decomposition (lapack potrs) */ - "chosolve_lo", - "(m,m),(m,n)->(m,n)", - "solve for symmetric/hermitian matrices using cholesky"\ - " factorization. \n", - 4,2,1, - FUNC_ARRAY_NAME(chosolve_lo), - equal_3_types - }, - { /* solve using cholesky decomposition (lapack potrs) */ - "chosolve1_up", - "(m,m),(m)->(m)", - "solve a system using cholesky decomposition. \n", - 4,2,1, - FUNC_ARRAY_NAME(chosolve1_up), - equal_3_types - }, - { /* solve using cholesky decomposition (lapack potrs) */ - "chosolve1_lo", - "(m,m),(m)->(m)", - "solve a system using cholesky decomposition. \n", - 4,2,1, - FUNC_ARRAY_NAME(chosolve1_lo), - equal_3_types - }, - { /* inverse using cholesky decomposition (lapack potri) */ - "poinv_up", - "(m,m)->(m,m)", - "inverse using cholesky decomposition for symmetric/hermitian"\ - " matrices. \n", - 4,1,1, - FUNC_ARRAY_NAME(poinv_up), - equal_2_types - }, - { /* inverse using cholesky decomposition (lapack potri) */ - "poinv_lo", - "(m,m)->(m,m)", - "inverse using cholesky decomposition for symmetric/hermitian"\ - " matrices. \n", - 4,1,1, - FUNC_ARRAY_NAME(poinv_lo), - equal_2_types - }, }; static void |