diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-19 12:31:46 -0800 |
---|---|---|
committer | Mark Wiebe <mwwiebe@gmail.com> | 2011-01-19 12:55:33 -0800 |
commit | a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e (patch) | |
tree | c7da6bd334c58dec111201999b6951b441b92a55 | |
parent | fe08a916cf275ecd21c1b32b22aa3b8d2ca36b33 (diff) | |
download | numpy-a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e.tar.gz |
ENH: core: Added numeric casting strided transfer functions, to speed up simple casts
-rw-r--r-- | numpy/core/code_generators/numpy_api.py | 1 | ||||
-rw-r--r-- | numpy/core/include/numpy/ufuncobject.h | 25 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 46 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 184 | ||||
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 278 | ||||
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.h | 11 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 37 |
7 files changed, 496 insertions, 86 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index 1d98a8c4d..52c5d8bfc 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -292,6 +292,7 @@ multiarray_funcs_api = { 'NpyIter_GetWriteFlags': 258, 'NpyIter_DebugPrint': 259, 'NpyIter_IterationNeedsAPI': 260, + # 'PyArray_CastingConverter': 261, 'PyArray_CountNonzero': 262, 'PyArray_PromoteTypes': 263, diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 0fec32183..a575795a4 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -11,13 +11,33 @@ typedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *) typedef struct { PyObject_HEAD + /* + * nin: Number of inputs + * nout: Number of outputs + * nargs: Always nin + nout (Why is it stored?) + */ int nin, nout, nargs; + + /* Identity for reduction, either PyUFunc_One or PyUFunc_Zero */ int identity; + + /* Array of one-dimensional core loops */ PyUFuncGenericFunction *functions; + /* Array of funcdata that gets passed into the functions */ void **data; + /* The number of elements in 'functions' and 'data' */ int ntypes; + + /* Does not appear to be used */ int check_return; - char *name, *types; + + /* The name of the ufunc */ + char *name; + + /* Array of type numbers, of size ('nargs' * 'ntypes') */ + char *types; + + /* Documentation string */ char *doc; void *ptr; PyObject *obj; @@ -68,7 +88,8 @@ typedef struct { #define UFUNC_FPE_UNDERFLOW 4 #define UFUNC_FPE_INVALID 8 -#define UFUNC_ERR_DEFAULT 0 /* Error mode that avoids look-up (no checking) */ +/* Error mode that avoids look-up (no checking) */ +#define UFUNC_ERR_DEFAULT 0 #define UFUNC_OBJ_ISOBJECT 1 #define UFUNC_OBJ_NEEDS_API 2 diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 121277282..41f6b5834 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -334,52 +334,6 @@ object_depth_and_dimension(PyObject *s, int max, npy_intp *dims) return nd + 1; } -static void -_strided_byte_copy(char *dst, npy_intp outstrides, char *src, - npy_intp instrides, npy_intp N, int elsize) -{ - npy_intp i, j; - char *tout = dst; - char *tin = src; - -#define _FAST_MOVE(_type_) \ - for(i=0; i<N; i++) { \ - ((_type_ *)tout)[0] = ((_type_ *)tin)[0]; \ - tin += instrides; \ - tout += outstrides; \ - } \ - return - - switch(elsize) { - case 8: - _FAST_MOVE(Int64); - case 4: - _FAST_MOVE(Int32); - case 1: - _FAST_MOVE(Int8); - case 2: - _FAST_MOVE(Int16); - case 16: - for (i = 0; i < N; i++) { - ((Int64 *)tout)[0] = ((Int64 *)tin)[0]; - ((Int64 *)tout)[1] = ((Int64 *)tin)[1]; - tin += instrides; - tout += outstrides; - } - return; - default: - for(i = 0; i < N; i++) { - for(j=0; j<elsize; j++) { - *tout++ = *tin++; - } - tin = tin + instrides - elsize; - tout = tout + outstrides - elsize; - } - } -#undef _FAST_MOVE - -} - NPY_NO_EXPORT void _unaligned_strided_byte_copy(char *dst, npy_intp outstrides, char *src, npy_intp instrides, npy_intp N, int elsize) diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 91680dbc9..e04bac783 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -589,27 +589,81 @@ _aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride), } static int -get_cast_transfer_function(int aligned, +get_nbo_cast_numeric_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + int src_type_num, int dst_type_num, + PyArray_StridedTransferFn **out_stransfer, + void **out_transferdata) +{ + /* Emit a warning if complex imaginary is being cast away */ + if (PyTypeNum_ISCOMPLEX(src_type_num) && + !PyTypeNum_ISCOMPLEX(dst_type_num) && + !PyTypeNum_ISBOOL(dst_type_num)) { + PyObject *cls = NULL, *obj = NULL; + int ret; + obj = PyImport_ImportModule("numpy.core"); + if (obj) { + cls = PyObject_GetAttrString(obj, "ComplexWarning"); + Py_DECREF(obj); + } +#if PY_VERSION_HEX >= 0x02050000 + ret = PyErr_WarnEx(cls, + "Casting complex values to real discards " + "the imaginary part", 1); +#else + ret = PyErr_Warn(cls, + "Casting complex values to real discards " + "the imaginary part"); +#endif + Py_XDECREF(cls); + if (ret < 0) { + return NPY_FAIL; + } + } + + *out_stransfer = PyArray_GetStridedNumericCastFn(aligned, + src_stride, dst_stride, + src_type_num, dst_type_num); + *out_transferdata = NULL; + if (*out_stransfer == NULL) { + PyErr_SetString(PyExc_ValueError, + "unexpected error in GetStridedNumericCastFn"); + return NPY_FAIL; + } + + return NPY_SUCCEED; +} + +static int +get_nbo_cast_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, int move_references, PyArray_StridedTransferFn **out_stransfer, void **out_transferdata, - int *out_needs_api) + int *out_needs_api, + int *out_needs_wrap) { _strided_cast_data *data; - void *todata = NULL, *fromdata = NULL; PyArray_VectorUnaryFunc *castfunc; + PyArray_Descr *tmp_dtype; npy_intp shape = 1, src_itemsize = src_dtype->elsize, dst_itemsize = dst_dtype->elsize; - PyArray_Descr *tmp_dtype; - if (src_dtype->type_num == dst_dtype->type_num) { - PyErr_SetString(PyExc_ValueError, - "low level cast function is for unequal type numbers"); - return NPY_FAIL; + if (PyTypeNum_ISNUMBER(src_dtype->type_num) && + PyTypeNum_ISNUMBER(dst_dtype->type_num)) { + *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || + !PyArray_ISNBO(dst_dtype->byteorder); + return get_nbo_cast_numeric_transfer_function(aligned, + src_stride, dst_stride, + src_dtype->type_num, dst_dtype->type_num, + out_stransfer, out_transferdata); } + *out_needs_wrap = !aligned || + !PyArray_ISNBO(src_dtype->byteorder) || + !PyArray_ISNBO(dst_dtype->byteorder); + /* Check the data types whose casting functions use API calls */ switch (src_dtype->type_num) { case NPY_OBJECT: @@ -700,29 +754,74 @@ get_cast_transfer_function(int aligned, return NPY_FAIL; } - /* If it's aligned and all native byte order, we're all done */ - if (aligned && PyArray_ISNBO(src_dtype->byteorder) && - PyArray_ISNBO(dst_dtype->byteorder)) { - /* Choose the contiguous cast if we can */ - if (move_references && src_dtype->type_num == NPY_OBJECT) { - *out_stransfer = _aligned_strided_to_strided_cast_decref_src; + if (move_references && src_dtype->type_num == NPY_OBJECT) { + *out_stransfer = _aligned_strided_to_strided_cast_decref_src; + } + else { + /* + * Use the contig version if the strides are contiguous or + * we're telling the caller to wrap the return, because + * the wrapping uses a contiguous buffer. + */ + if ((src_stride == src_itemsize && dst_stride == dst_itemsize) || + *out_needs_wrap) { + *out_stransfer = _aligned_contig_to_contig_cast; } else { - if (src_stride == src_itemsize && dst_stride == dst_itemsize) { - *out_stransfer = _aligned_contig_to_contig_cast; - } - else { - *out_stransfer = _aligned_strided_to_strided_cast; - } + *out_stransfer = _aligned_strided_to_strided_cast; } - *out_transferdata = data; + } + *out_transferdata = data; + + return NPY_SUCCEED; +} + +static int +get_cast_transfer_function(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedTransferFn **out_stransfer, + void **out_transferdata, + int *out_needs_api) +{ + PyArray_StridedTransferFn *caststransfer; + void *castdata, *todata = NULL, *fromdata = NULL; + int needs_wrap = 0; + npy_intp src_itemsize = src_dtype->elsize, + dst_itemsize = dst_dtype->elsize; + + if (src_dtype->type_num == dst_dtype->type_num) { + PyErr_SetString(PyExc_ValueError, + "low level cast function is for unequal type numbers"); + return NPY_FAIL; + } + + if (get_nbo_cast_transfer_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + &caststransfer, + &castdata, + out_needs_api, + &needs_wrap) != NPY_SUCCEED) { + return NPY_FAIL; + } + + /* + * If all native byte order and doesn't need alignment wrapping, + * return the function + */ + if (!needs_wrap) { + *out_stransfer = caststransfer; + *out_transferdata = castdata; return NPY_SUCCEED; } /* Otherwise, we have to copy and/or swap to aligned temporaries */ else { - PyArray_StridedTransferFn *tobuffer, *frombuffer, *casttransfer; + PyArray_StridedTransferFn *tobuffer, *frombuffer; /* Get the copy/swap operation from src */ @@ -747,7 +846,7 @@ get_cast_transfer_function(int aligned, src_stride, src_itemsize, src_itemsize); } - /* If not complex, a paired swap */ + /* If complex, a paired swap */ else { tobuffer = PyArray_GetStridedCopySwapPairFn(aligned, src_stride, src_itemsize, @@ -782,7 +881,7 @@ get_cast_transfer_function(int aligned, dst_itemsize, dst_stride, dst_itemsize); } - /* If not complex, a paired swap */ + /* If complex, a paired swap */ else { frombuffer = PyArray_GetStridedCopySwapPairFn(aligned, dst_itemsize, dst_stride, @@ -790,30 +889,25 @@ get_cast_transfer_function(int aligned, } if (frombuffer == NULL || tobuffer == NULL) { - PyArray_FreeStridedTransferData(data); + PyArray_FreeStridedTransferData(castdata); PyArray_FreeStridedTransferData(todata); PyArray_FreeStridedTransferData(fromdata); return NPY_FAIL; } - /* If necessary, use the cast function with source decref */ - if (move_references && src_dtype->type_num == NPY_OBJECT) { - *out_stransfer = _aligned_strided_to_strided_cast_decref_src; - } - /* Use the aligned contiguous cast otherwise */ - else { - casttransfer = &_aligned_contig_to_contig_cast; - } + *out_stransfer = caststransfer; /* Wrap it all up in a new transfer function + data */ if (wrap_aligned_contig_transfer_function( src_itemsize, dst_itemsize, tobuffer, todata, frombuffer, fromdata, - casttransfer, data, + caststransfer, castdata, PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), out_stransfer, out_transferdata) != NPY_SUCCEED) { - PyArray_FreeStridedTransferData(data); + PyArray_FreeStridedTransferData(castdata); + PyArray_FreeStridedTransferData(todata); + PyArray_FreeStridedTransferData(fromdata); return NPY_FAIL; } @@ -2652,6 +2746,26 @@ PyArray_GetDTypeTransferFunction(int aligned, src_type_num = src_dtype->type_num; dst_type_num = dst_dtype->type_num; + /* Common special case - number -> number NBO cast */ + if (PyTypeNum_ISNUMBER(src_type_num) && + PyTypeNum_ISNUMBER(dst_type_num) && + PyArray_ISNBO(src_dtype->byteorder) && + PyArray_ISNBO(dst_dtype->byteorder)) { + if (PyArray_EquivTypenums(src_type_num, dst_type_num)) { + *out_stransfer = PyArray_GetStridedCopyFn(aligned, + src_stride, dst_stride, + src_itemsize); + *out_transferdata = NULL; + return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED; + } + else { + return get_nbo_cast_numeric_transfer_function (aligned, + src_stride, dst_stride, + src_type_num, dst_type_num, + out_stransfer, out_transferdata); + } + } + /* * If there are no references and the data types are equivalent, * return a simple copy diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index c910e2992..84219df76 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -11,12 +11,13 @@ #include <numpy/ndarrayobject.h> #include <numpy/ufuncobject.h> #include <numpy/npy_cpu.h> +#include <numpy/halffloat.h> #include "lowlevel_strided_loops.h" /* x86 platform works with unaligned reads and writes */ #if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) -# define NPY_USE_UNALIGNED_ACCESS 1 +# define NPY_USE_UNALIGNED_ACCESS 0 //1 #else # define NPY_USE_UNALIGNED_ACCESS 0 #endif @@ -656,6 +657,280 @@ NPY_NO_EXPORT PyArray_StridedTransferFn * /**end repeat**/ +/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/ + +/**begin repeat + * + * #NAME1 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name1 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #rname1 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * float, double, longdouble# + * #is_half1 = 0*11, 1, 0*6# + * #is_float1 = 0*12, 1, 0, 0, 1, 0, 0# + * #is_double1 = 0*13, 1, 0, 0, 1, 0# + * #is_complex1 = 0*15, 1*3# + */ + +/**begin repeat1 + * + * #NAME2 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name2 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + * #rname2 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * float, double, longdouble# + * #is_half2 = 0*11, 1, 0*6# + * #is_float2 = 0*12, 1, 0, 0, 1, 0, 0# + * #is_double2 = 0*13, 1, 0, 0, 1, 0# + * #is_complex2 = 0*15, 1*3# + */ + +/**begin repeat2 + * #prefix = _aligned,,_aligned_contig,_contig# + * #aligned = 1,0,1,0# + * #contig = 0,0,1,1# + */ + +#if !(NPY_USE_UNALIGNED_ACCESS && !@aligned@) + +/* For half types, don't use actual double/float types in conversion */ +#if @is_half1@ || @is_half2@ + +# if @is_float1@ +# define _TYPE1 npy_uint32 +# elif @is_double1@ +# define _TYPE1 npy_uint64 +# else +# define _TYPE1 npy_@rname1@ +# endif + +# if @is_float2@ +# define _TYPE2 npy_uint32 +# elif @is_double2@ +# define _TYPE2 npy_uint64 +# else +# define _TYPE2 npy_@rname2@ +# endif + +#else + +#define _TYPE1 npy_@rname1@ +#define _TYPE2 npy_@rname2@ + +#endif + +/* Determine an appropriate casting conversion function */ +#if @is_half1@ + +# if @is_float2@ +# define _CONVERT_FN(x) npy_halfbits_to_floatbits(x) +# elif @is_double2@ +# define _CONVERT_FN(x) npy_halfbits_to_doublebits(x) +# elif @is_half2@ +# define _CONVERT_FN(x) (x) +# else +# define _CONVERT_FN(x) ((_TYPE2)npy_half_to_float(x)) +# endif + +#elif @is_half2@ + +# if @is_float1@ +# define _CONVERT_FN(x) npy_floatbits_to_halfbits(x) +# elif @is_double1@ +# define _CONVERT_FN(x) npy_doublebits_to_halfbits(x) +# else +# define _CONVERT_FN(x) npy_float_to_half((float)x) +# endif + +#else + +# define _CONVERT_FN(x) ((_TYPE2)x) + +#endif + +static void +@prefix@_cast_@name1@_to_@name2@( + char *dst, npy_intp dst_stride, + char *src, npy_intp src_stride, + npy_intp N, npy_intp NPY_UNUSED(src_itemsize), + void *NPY_UNUSED(data)) +{ +#if @is_complex1@ + _TYPE1 src_value[2]; +#elif !@aligned@ + _TYPE1 src_value; +#endif +#if @is_complex2@ + _TYPE2 dst_value[2]; +#elif !@aligned@ + _TYPE2 dst_value; +#endif + + /*printf("@prefix@_cast_@name1@_to_@name2@\n");*/ + + while (N--) { +#if @aligned@ +# if @is_complex1@ + src_value[0] = ((_TYPE1 *)src)[0]; + src_value[1] = ((_TYPE1 *)src)[1]; +# elif !@aligned@ + src_value = *((_TYPE1 *)src); +# endif +#else + memcpy(&src_value, src, sizeof(src_value)); +#endif + +/* Do the cast */ +#if @is_complex1@ +# if @is_complex2@ + dst_value[0] = _CONVERT_FN(src_value[0]); + dst_value[1] = _CONVERT_FN(src_value[1]); +# elif !@aligned@ + dst_value = _CONVERT_FN(src_value[0]); +# else + *(_TYPE2 *)dst = _CONVERT_FN(src_value[0]); +# endif +#else +# if @is_complex2@ +# if !@aligned@ + dst_value[0] = _CONVERT_FN(src_value); +# else + dst_value[0] = _CONVERT_FN(*(_TYPE1 *)src); +# endif + dst_value[1] = 0; +# elif !@aligned@ + dst_value = _CONVERT_FN(src_value); +# else + *(_TYPE2 *)dst = _CONVERT_FN(*(_TYPE1 *)src); +# endif +#endif + +#if @aligned@ +# if @is_complex2@ + ((_TYPE2 *)dst)[0] = dst_value[0]; + ((_TYPE2 *)dst)[1] = dst_value[1]; +# elif !@aligned@ + *((_TYPE2 *)dst) = dst_value; +# endif +#else + memcpy(dst, &dst_value, sizeof(dst_value)); +#endif + +#if @contig@ + dst += sizeof(npy_@name2@); + src += sizeof(npy_@name1@); +#else + dst += dst_stride; + src += src_stride; +#endif + } +} + +#undef _CONVERT_FN +#undef _TYPE2 +#undef _TYPE1 + +#endif + +/**end repeat2**/ + +/**end repeat1**/ + +/**end repeat**/ + +NPY_NO_EXPORT PyArray_StridedTransferFn * +PyArray_GetStridedNumericCastFn(npy_intp aligned, npy_intp src_stride, + npy_intp dst_stride, + int src_type_num, int dst_type_num) +{ + switch (src_type_num) { +/**begin repeat + * + * #NAME1 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name1 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + */ + + case NPY_@NAME1@: + //printf("test fn %d - second %d\n", NPY_@NAME1@, dst_type_num); + switch (dst_type_num) { +/**begin repeat1 + * + * #NAME2 = BOOL, + * UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, + * CFLOAT, CDOUBLE, CLONGDOUBLE# + * #name2 = bool, + * ubyte, ushort, uint, ulong, ulonglong, + * byte, short, int, long, longlong, + * half, float, double, longdouble, + * cfloat, cdouble, clongdouble# + */ + + case NPY_@NAME2@: + //printf("ret fn %d %d\n", NPY_@NAME1@, NPY_@NAME2@); +# if NPY_USE_UNALIGNED_ACCESS + if (src_stride == sizeof(npy_@name1@) && + dst_stride == sizeof(npy_@name2@)) { + return &_aligned_contig_cast_@name1@_to_@name2@; + } + else { + return &_aligned_cast_@name1@_to_@name2@; + } +# else + if (src_stride == sizeof(npy_@name1@) && + dst_stride == sizeof(npy_@name2@)) { + return aligned ? + &_aligned_contig_cast_@name1@_to_@name2@ : + &_contig_cast_@name1@_to_@name2@; + } + else { + return aligned ? &_aligned_cast_@name1@_to_@name2@ : + &_cast_@name1@_to_@name2@; + } +# endif + +/**end repeat1**/ + } + //printf("switched test fn %d - second %d\n", NPY_@NAME1@, dst_type_num); + +/**end repeat**/ + } + + return NULL; +} + + +/************** STRIDED TRANSFER FUNCTION MEMORY MANAGEMENT **************/ + typedef void (*_npy_stridedtransfer_dealloc)(void *); NPY_NO_EXPORT void PyArray_FreeStridedTransferData(void *transferdata) @@ -680,6 +955,7 @@ PyArray_CopyStridedTransferData(void *transferdata) return NULL; } +/****************** PRIMITIVE FLAT TO/FROM NDIM FUNCTIONS ******************/ NPY_NO_EXPORT npy_intp PyArray_TransferNDimToStrided(npy_intp ndim, diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.h b/numpy/core/src/multiarray/lowlevel_strided_loops.h index e3427e3c6..34cdf3c39 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.h +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.h @@ -109,6 +109,17 @@ PyArray_GetStridedZeroPadCopyFn(int aligned, void **outtransferdata); /* + * For casts between built-in numeric types, + * this produces a function pointer for casting from src_type_num + * to dst_type_num. If a conversion is unsupported, returns NULL + * without setting a Python exception. + */ +NPY_NO_EXPORT PyArray_StridedTransferFn * +PyArray_GetStridedNumericCastFn(npy_intp aligned, npy_intp src_stride, + npy_intp dst_stride, + int src_type_num, int dst_type_num); + +/* * If it's possible, gives back a transfer function which casts and/or * byte swaps data with the dtype 'src_dtype' into data with the dtype * 'dst_dtype'. If the outtransferdata is populated with a non-NULL value, diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index d2d57d125..6a25e3a4c 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3509,6 +3509,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds) /* This contains the all the inputs and outputs */ PyArrayObject *op[NPY_MAXARGS]; PyArray_Descr *dtype[NPY_MAXARGS]; + PyArray_Descr *result_type; /* TODO: For 1.6, the default should probably be NPY_CORDER */ NPY_ORDER order = NPY_KEEPORDER; @@ -3535,6 +3536,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds) op[i] = NULL; dtype[i] = NULL; } + result_type = NULL; /* Get input arguments */ for(i = 0; i < nin; ++i) { @@ -3555,6 +3557,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds) op[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, context); /* Start with a native byte-order data type */ + /* if (PyArray_ISNBO(PyArray_DESCR(op[i])->byteorder)) { dtype[i] = PyArray_DESCR(op[i]); Py_INCREF(dtype[i]); @@ -3563,6 +3566,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds) dtype[i] = PyArray_DescrNewByteorder(PyArray_DESCR(op[i]), NPY_NATIVE); } + */ } /* Get positional output arguments */ @@ -3670,13 +3674,42 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds) } } } - - return NULL; + + /* + * Determine the result type. A better approach would be for + * the ufunc to provide a function which gives back the result + * type and inner loop function. The default would work as follows. + */ + result_type = PyArray_ResultType(nin, op, 0, NULL); + if (result_type == NULL) { + goto fail; + } + /* Take into account the types of any output parameters */ + for (i = nin; i < nargs; ++i) { + PyArray_Descr *tmp; + if (op[i] != NULL) { + tmp = PyArray_PromoteTypes(result_type, PyArray_DESCR(op[i])); + if (tmp == NULL) { + goto fail; + } + Py_DECREF(result_type); + result_type = tmp; + } + } + + /* Find the function loop */ + + printf("result type: "); + PyObject_Print((PyObject *)result_type, stdout, 0); + printf("\n"); + + PyErr_SetString(PyExc_RuntimeError, "implementation is not finished!"); fail: for (i = 0; i < niter; ++i) { Py_XDECREF(op[i]); Py_XDECREF(dtype[i]); } + Py_XDECREF(result_type); return NULL; } |