diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2012-02-04 14:53:01 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2012-02-04 17:54:38 -0700 |
commit | 41a41f1ef4c4f53ee39bd4332d1e9d0acc0b16ca (patch) | |
tree | abfafd7d7c4c790dd909d3e8c0d1de8b09dc1393 /numpy | |
parent | 1099bdc9d410381f9206c798971d5ef178537414 (diff) | |
download | numpy-41a41f1ef4c4f53ee39bd4332d1e9d0acc0b16ca.tar.gz |
UPD: Use explicit prefixed types in einsum.c.src template headers.
The rational for this is that everything is explicit in the template
header and easily accessed there. I also think it better to keep the
names and types independent, but that is arguable. The file was
otherwise in no need of updating.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/einsum.c.src | 250 |
1 files changed, 127 insertions, 123 deletions
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index 59e98cd6b..1692b633a 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -75,10 +75,14 @@ typedef enum { * ubyte, ushort, uint, ulong, ulonglong, * half, float, double, longdouble, * cfloat, cdouble, clongdouble# - * #temp = byte, short, int, long, longlong, - * ubyte, ushort, uint, ulong, ulonglong, - * float, float, double, longdouble, - * float, double, longdouble# + * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_half, npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble# + * #temptype = npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_float, npy_float, npy_double, npy_longdouble, + * npy_float, npy_double, npy_longdouble# * #to = ,,,,, * ,,,,, * npy_float_to_half,,,, @@ -131,44 +135,44 @@ static void while (count--) { #if !@complex@ # if @nop@ == 1 - *(npy_@name@ *)data_out = @to@(@from@(*(npy_@name@ *)data0) + - @from@(*(npy_@name@ *)data_out)); + *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) + + @from@(*(@type@ *)data_out)); data0 += stride0; data_out += stride_out; # elif @nop@ == 2 - *(npy_@name@ *)data_out = @to@(@from@(*(npy_@name@ *)data0) * - @from@(*(npy_@name@ *)data1) + - @from@(*(npy_@name@ *)data_out)); + *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1) + + @from@(*(@type@ *)data_out)); data0 += stride0; data1 += stride1; data_out += stride_out; # elif @nop@ == 3 - *(npy_@name@ *)data_out = @to@(@from@(*(npy_@name@ *)data0) * - @from@(*(npy_@name@ *)data1) * - @from@(*(npy_@name@ *)data2) + - @from@(*(npy_@name@ *)data_out)); + *(@type@ *)data_out = @to@(@from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1) * + @from@(*(@type@ *)data2) + + @from@(*(@type@ *)data_out)); data0 += stride0; data1 += stride1; data2 += stride2; data_out += stride_out; # else - npy_@temp@ temp = @from@(*(npy_@name@ *)dataptr[0]); + @temptype@ temp = @from@(*(@type@ *)dataptr[0]); int i; for (i = 1; i < nop; ++i) { - temp *= @from@(*(npy_@name@ *)dataptr[i]); + temp *= @from@(*(@type@ *)dataptr[i]); } - *(npy_@name@ *)dataptr[nop] = @to@(temp + - @from@(*(npy_@name@ *)dataptr[i])); + *(@type@ *)dataptr[nop] = @to@(temp + + @from@(*(@type@ *)dataptr[i])); for (i = 0; i <= nop; ++i) { dataptr[i] += strides[i]; } # endif #else /* complex */ # if @nop@ == 1 - ((npy_@temp@ *)data_out)[0] = ((npy_@temp@ *)data0)[0] + - ((npy_@temp@ *)data_out)[0]; - ((npy_@temp@ *)data_out)[1] = ((npy_@temp@ *)data0)[1] + - ((npy_@temp@ *)data_out)[1]; + ((@temptype@ *)data_out)[0] = ((@temptype@ *)data0)[0] + + ((@temptype@ *)data_out)[0]; + ((@temptype@ *)data_out)[1] = ((@temptype@ *)data0)[1] + + ((@temptype@ *)data_out)[1]; data0 += stride0; data_out += stride_out; # else @@ -177,21 +181,21 @@ static void # else #define _SUMPROD_NOP nop # endif - npy_@temp@ re, im, tmp; + @temptype@ re, im, tmp; int i; - re = ((npy_@temp@ *)dataptr[0])[0]; - im = ((npy_@temp@ *)dataptr[0])[1]; + re = ((@temptype@ *)dataptr[0])[0]; + im = ((@temptype@ *)dataptr[0])[1]; for (i = 1; i < _SUMPROD_NOP; ++i) { - tmp = re * ((npy_@temp@ *)dataptr[i])[0] - - im * ((npy_@temp@ *)dataptr[i])[1]; - im = re * ((npy_@temp@ *)dataptr[i])[1] + - im * ((npy_@temp@ *)dataptr[i])[0]; + tmp = re * ((@temptype@ *)dataptr[i])[0] - + im * ((@temptype@ *)dataptr[i])[1]; + im = re * ((@temptype@ *)dataptr[i])[1] + + im * ((@temptype@ *)dataptr[i])[0]; re = tmp; } - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[0] = re + - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[0]; - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[1] = im + - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[1]; + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0]; + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1]; for (i = 0; i <= _SUMPROD_NOP; ++i) { dataptr[i] += strides[i]; @@ -208,8 +212,8 @@ static void @name@_sum_of_products_contig_one(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; - npy_@name@ *data_out = (npy_@name@ *)dataptr[1]; + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data_out = (@type@ *)dataptr[1]; NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_one (%d)\n", (int)count); @@ -225,12 +229,12 @@ finish_after_unrolled_loop: data_out[@i@] = @to@(@from@(data0[@i@]) + @from@(data_out[@i@])); #else - ((npy_@temp@ *)data_out + 2*@i@)[0] = - ((npy_@temp@ *)data0 + 2*@i@)[0] + - ((npy_@temp@ *)data_out + 2*@i@)[0]; - ((npy_@temp@ *)data_out + 2*@i@)[1] = - ((npy_@temp@ *)data0 + 2*@i@)[1] + - ((npy_@temp@ *)data_out + 2*@i@)[1]; + ((@temptype@ *)data_out + 2*@i@)[0] = + ((@temptype@ *)data0 + 2*@i@)[0] + + ((@temptype@ *)data_out + 2*@i@)[0]; + ((@temptype@ *)data_out + 2*@i@)[1] = + ((@temptype@ *)data0 + 2*@i@)[1] + + ((@temptype@ *)data_out + 2*@i@)[1]; #endif /**end repeat2**/ case 0: @@ -248,12 +252,12 @@ finish_after_unrolled_loop: data_out[@i@] = @to@(@from@(data0[@i@]) + @from@(data_out[@i@])); #else /* complex */ - ((npy_@temp@ *)data_out + 2*@i@)[0] = - ((npy_@temp@ *)data0 + 2*@i@)[0] + - ((npy_@temp@ *)data_out + 2*@i@)[0]; - ((npy_@temp@ *)data_out + 2*@i@)[1] = - ((npy_@temp@ *)data0 + 2*@i@)[1] + - ((npy_@temp@ *)data_out + 2*@i@)[1]; + ((@temptype@ *)data_out + 2*@i@)[0] = + ((@temptype@ *)data0 + 2*@i@)[0] + + ((@temptype@ *)data_out + 2*@i@)[0]; + ((@temptype@ *)data_out + 2*@i@)[1] = + ((@temptype@ *)data0 + 2*@i@)[1] + + ((@temptype@ *)data_out + 2*@i@)[1]; #endif /**end repeat2**/ data0 += 8; @@ -270,9 +274,9 @@ static void @name@_sum_of_products_contig_two(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; - npy_@name@ *data1 = (npy_@name@ *)dataptr[1]; - npy_@name@ *data_out = (npy_@name@ *)dataptr[2]; + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data1 = (@type@ *)dataptr[1]; + @type@ *data_out = (@type@ *)dataptr[2]; #if EINSUM_USE_SSE1 && @float32@ __m128 a, b; @@ -356,9 +360,9 @@ static void @name@_sum_of_products_stride0_contig_outcontig_two(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@temp@ value0 = @from@(*(npy_@name@ *)dataptr[0]); - npy_@name@ *data1 = (npy_@name@ *)dataptr[1]; - npy_@name@ *data_out = (npy_@name@ *)dataptr[2]; + @temptype@ value0 = @from@(*(@type@ *)dataptr[0]); + @type@ *data1 = (@type@ *)dataptr[1]; + @type@ *data_out = (@type@ *)dataptr[2]; #if EINSUM_USE_SSE1 && @float32@ __m128 a, b, value0_sse; @@ -485,9 +489,9 @@ static void @name@_sum_of_products_contig_stride0_outcontig_two(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; - npy_@temp@ value1 = @from@(*(npy_@name@ *)dataptr[1]); - npy_@name@ *data_out = (npy_@name@ *)dataptr[2]; + @type@ *data0 = (@type@ *)dataptr[0]; + @temptype@ value1 = @from@(*(@type@ *)dataptr[1]); + @type@ *data_out = (@type@ *)dataptr[2]; #if EINSUM_USE_SSE1 && @float32@ __m128 a, b, value1_sse; @@ -569,9 +573,9 @@ static void @name@_sum_of_products_contig_contig_outstride0_two(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; - npy_@name@ *data1 = (npy_@name@ *)dataptr[1]; - npy_@temp@ accum = 0; + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data1 = (@type@ *)dataptr[1]; + @temptype@ accum = 0; #if EINSUM_USE_SSE1 && @float32@ __m128 a, accum_sse = _mm_setzero_ps(); @@ -592,7 +596,7 @@ finish_after_unrolled_loop: accum += @from@(data0[@i@]) * @from@(data1[@i@]); /**end repeat2**/ case 0: - *(npy_@name@ *)dataptr[2] += @to@(accum); + *(@type@ *)dataptr[2] += @to@(accum); return; } @@ -729,9 +733,9 @@ static void @name@_sum_of_products_stride0_contig_outstride0_two(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@temp@ value0 = @from@(*(npy_@name@ *)dataptr[0]); - npy_@name@ *data1 = (npy_@name@ *)dataptr[1]; - npy_@temp@ accum = 0; + @temptype@ value0 = @from@(*(@type@ *)dataptr[0]); + @type@ *data1 = (@type@ *)dataptr[1]; + @temptype@ accum = 0; #if EINSUM_USE_SSE1 && @float32@ __m128 a, accum_sse = _mm_setzero_ps(); @@ -750,7 +754,7 @@ finish_after_unrolled_loop: accum += @from@(data1[@i@]); /**end repeat2**/ case 0: - *(npy_@name@ *)dataptr[2] += @to@(value0 * accum); + *(@type@ *)dataptr[2] += @to@(value0 * accum); return; } @@ -828,9 +832,9 @@ static void @name@_sum_of_products_contig_stride0_outstride0_two(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; - npy_@temp@ value1 = @from@(*(npy_@name@ *)dataptr[1]); - npy_@temp@ accum = 0; + @type@ *data0 = (@type@ *)dataptr[0]; + @temptype@ value1 = @from@(*(@type@ *)dataptr[1]); + @temptype@ accum = 0; #if EINSUM_USE_SSE1 && @float32@ __m128 a, accum_sse = _mm_setzero_ps(); @@ -849,7 +853,7 @@ finish_after_unrolled_loop: accum += @from@(data0[@i@]); /**end repeat2**/ case 0: - *(npy_@name@ *)dataptr[2] += @to@(accum * value1); + *(@type@ *)dataptr[2] += @to@(accum * value1); return; } @@ -929,10 +933,10 @@ static void @name@_sum_of_products_contig_three(int nop, char **dataptr, npy_intp *NPY_UNUSED(strides), npy_intp count) { - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; - npy_@name@ *data1 = (npy_@name@ *)dataptr[1]; - npy_@name@ *data2 = (npy_@name@ *)dataptr[2]; - npy_@name@ *data_out = (npy_@name@ *)dataptr[3]; + @type@ *data0 = (@type@ *)dataptr[0]; + @type@ *data1 = (@type@ *)dataptr[1]; + @type@ *data2 = (@type@ *)dataptr[2]; + @type@ *data_out = (@type@ *)dataptr[3]; /* Unroll the loop by 8 */ while (count >= 8) { @@ -978,15 +982,15 @@ static void while (count--) { #if !@complex@ - npy_@temp@ temp = @from@(*(npy_@name@ *)dataptr[0]); + @temptype@ temp = @from@(*(@type@ *)dataptr[0]); int i; for (i = 1; i < nop; ++i) { - temp *= @from@(*(npy_@name@ *)dataptr[i]); + temp *= @from@(*(@type@ *)dataptr[i]); } - *(npy_@name@ *)dataptr[nop] = @to@(temp + - @from@(*(npy_@name@ *)dataptr[i])); + *(@type@ *)dataptr[nop] = @to@(temp + + @from@(*(@type@ *)dataptr[i])); for (i = 0; i <= nop; ++i) { - dataptr[i] += sizeof(npy_@name@); + dataptr[i] += sizeof(@type@); } #else /* complex */ # if @nop@ <= 3 @@ -994,24 +998,24 @@ static void # else # define _SUMPROD_NOP nop # endif - npy_@temp@ re, im, tmp; + @temptype@ re, im, tmp; int i; - re = ((npy_@temp@ *)dataptr[0])[0]; - im = ((npy_@temp@ *)dataptr[0])[1]; + re = ((@temptype@ *)dataptr[0])[0]; + im = ((@temptype@ *)dataptr[0])[1]; for (i = 1; i < _SUMPROD_NOP; ++i) { - tmp = re * ((npy_@temp@ *)dataptr[i])[0] - - im * ((npy_@temp@ *)dataptr[i])[1]; - im = re * ((npy_@temp@ *)dataptr[i])[1] + - im * ((npy_@temp@ *)dataptr[i])[0]; + tmp = re * ((@temptype@ *)dataptr[i])[0] - + im * ((@temptype@ *)dataptr[i])[1]; + im = re * ((@temptype@ *)dataptr[i])[1] + + im * ((@temptype@ *)dataptr[i])[0]; re = tmp; } - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[0] = re + - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[0]; - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[1] = im + - ((npy_@temp@ *)dataptr[_SUMPROD_NOP])[1]; + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0] = re + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[0]; + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1] = im + + ((@temptype@ *)dataptr[_SUMPROD_NOP])[1]; for (i = 0; i <= _SUMPROD_NOP; ++i) { - dataptr[i] += sizeof(npy_@name@); + dataptr[i] += sizeof(@type@); } # undef _SUMPROD_NOP #endif @@ -1027,11 +1031,11 @@ static void npy_intp *strides, npy_intp count) { #if @complex@ - npy_@temp@ accum_re = 0, accum_im = 0; - npy_@temp@ *data0 = (npy_@temp@ *)dataptr[0]; + @temptype@ accum_re = 0, accum_im = 0; + @temptype@ *data0 = (@temptype@ *)dataptr[0]; #else - npy_@temp@ accum = 0; - npy_@name@ *data0 = (npy_@name@ *)dataptr[0]; + @temptype@ accum = 0; + @type@ *data0 = (@type@ *)dataptr[0]; #endif #if EINSUM_USE_SSE1 && @float32@ @@ -1060,11 +1064,11 @@ finish_after_unrolled_loop: /**end repeat2**/ case 0: #if @complex@ - ((npy_@temp@ *)dataptr[1])[0] += accum_re; - ((npy_@temp@ *)dataptr[1])[1] += accum_im; + ((@temptype@ *)dataptr[1])[0] += accum_re; + ((@temptype@ *)dataptr[1])[1] += accum_im; #else - *((npy_@name@ *)dataptr[1]) = @to@(accum + - @from@(*((npy_@name@ *)dataptr[1]))); + *((@type@ *)dataptr[1]) = @to@(accum + + @from@(*((@type@ *)dataptr[1]))); #endif return; } @@ -1204,9 +1208,9 @@ static void npy_intp *strides, npy_intp count) { #if @complex@ - npy_@temp@ accum_re = 0, accum_im = 0; + @temptype@ accum_re = 0, accum_im = 0; #else - npy_@temp@ accum = 0; + @temptype@ accum = 0; #endif #if (@nop@ == 1) || (@nop@ <= 3 && !@complex@) @@ -1228,25 +1232,25 @@ static void while (count--) { #if !@complex@ # if @nop@ == 1 - accum += @from@(*(npy_@name@ *)data0); + accum += @from@(*(@type@ *)data0); data0 += stride0; # elif @nop@ == 2 - accum += @from@(*(npy_@name@ *)data0) * - @from@(*(npy_@name@ *)data1); + accum += @from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1); data0 += stride0; data1 += stride1; # elif @nop@ == 3 - accum += @from@(*(npy_@name@ *)data0) * - @from@(*(npy_@name@ *)data1) * - @from@(*(npy_@name@ *)data2); + accum += @from@(*(@type@ *)data0) * + @from@(*(@type@ *)data1) * + @from@(*(@type@ *)data2); data0 += stride0; data1 += stride1; data2 += stride2; # else - npy_@temp@ temp = @from@(*(npy_@name@ *)dataptr[0]); + @temptype@ temp = @from@(*(@type@ *)dataptr[0]); int i; for (i = 1; i < nop; ++i) { - temp *= @from@(*(npy_@name@ *)dataptr[i]); + temp *= @from@(*(@type@ *)dataptr[i]); } accum += temp; for (i = 0; i < nop; ++i) { @@ -1255,8 +1259,8 @@ static void # endif #else /* complex */ # if @nop@ == 1 - accum_re += ((npy_@temp@ *)data0)[0]; - accum_im += ((npy_@temp@ *)data0)[1]; + accum_re += ((@temptype@ *)data0)[0]; + accum_im += ((@temptype@ *)data0)[1]; data0 += stride0; # else # if @nop@ <= 3 @@ -1264,15 +1268,15 @@ static void # else #define _SUMPROD_NOP nop # endif - npy_@temp@ re, im, tmp; + @temptype@ re, im, tmp; int i; - re = ((npy_@temp@ *)dataptr[0])[0]; - im = ((npy_@temp@ *)dataptr[0])[1]; + re = ((@temptype@ *)dataptr[0])[0]; + im = ((@temptype@ *)dataptr[0])[1]; for (i = 1; i < _SUMPROD_NOP; ++i) { - tmp = re * ((npy_@temp@ *)dataptr[i])[0] - - im * ((npy_@temp@ *)dataptr[i])[1]; - im = re * ((npy_@temp@ *)dataptr[i])[1] + - im * ((npy_@temp@ *)dataptr[i])[0]; + tmp = re * ((@temptype@ *)dataptr[i])[0] - + im * ((@temptype@ *)dataptr[i])[1]; + im = re * ((@temptype@ *)dataptr[i])[1] + + im * ((@temptype@ *)dataptr[i])[0]; re = tmp; } accum_re += re; @@ -1287,19 +1291,19 @@ static void #if @complex@ # if @nop@ <= 3 - ((npy_@temp@ *)dataptr[@nop@])[0] += accum_re; - ((npy_@temp@ *)dataptr[@nop@])[1] += accum_im; + ((@temptype@ *)dataptr[@nop@])[0] += accum_re; + ((@temptype@ *)dataptr[@nop@])[1] += accum_im; # else - ((npy_@temp@ *)dataptr[nop])[0] += accum_re; - ((npy_@temp@ *)dataptr[nop])[1] += accum_im; + ((@temptype@ *)dataptr[nop])[0] += accum_re; + ((@temptype@ *)dataptr[nop])[1] += accum_im; # endif #else # if @nop@ <= 3 - *((npy_@name@ *)dataptr[@nop@]) = @to@(accum + - @from@(*((npy_@name@ *)dataptr[@nop@]))); + *((@type@ *)dataptr[@nop@]) = @to@(accum + + @from@(*((@type@ *)dataptr[@nop@]))); # else - *((npy_@name@ *)dataptr[nop]) = @to@(accum + - @from@(*((npy_@name@ *)dataptr[nop]))); + *((@type@ *)dataptr[nop]) = @to@(accum + + @from@(*((@type@ *)dataptr[nop]))); # endif #endif |