summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-01-19 12:31:46 -0800
committerMark Wiebe <mwwiebe@gmail.com>2011-01-19 12:55:33 -0800
commita1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e (patch)
treec7da6bd334c58dec111201999b6951b441b92a55
parentfe08a916cf275ecd21c1b32b22aa3b8d2ca36b33 (diff)
downloadnumpy-a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e.tar.gz
ENH: core: Added numeric casting strided transfer functions, to speed up simple casts
-rw-r--r--numpy/core/code_generators/numpy_api.py1
-rw-r--r--numpy/core/include/numpy/ufuncobject.h25
-rw-r--r--numpy/core/src/multiarray/ctors.c46
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c184
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src278
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.h11
-rw-r--r--numpy/core/src/umath/ufunc_object.c37
7 files changed, 496 insertions, 86 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 1d98a8c4d..52c5d8bfc 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -292,6 +292,7 @@ multiarray_funcs_api = {
'NpyIter_GetWriteFlags': 258,
'NpyIter_DebugPrint': 259,
'NpyIter_IterationNeedsAPI': 260,
+ #
'PyArray_CastingConverter': 261,
'PyArray_CountNonzero': 262,
'PyArray_PromoteTypes': 263,
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 0fec32183..a575795a4 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -11,13 +11,33 @@ typedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *)
typedef struct {
PyObject_HEAD
+ /*
+ * nin: Number of inputs
+ * nout: Number of outputs
+ * nargs: Always nin + nout (Why is it stored?)
+ */
int nin, nout, nargs;
+
+ /* Identity for reduction, either PyUFunc_One or PyUFunc_Zero */
int identity;
+
+ /* Array of one-dimensional core loops */
PyUFuncGenericFunction *functions;
+ /* Array of funcdata that gets passed into the functions */
void **data;
+ /* The number of elements in 'functions' and 'data' */
int ntypes;
+
+ /* Does not appear to be used */
int check_return;
- char *name, *types;
+
+ /* The name of the ufunc */
+ char *name;
+
+ /* Array of type numbers, of size ('nargs' * 'ntypes') */
+ char *types;
+
+ /* Documentation string */
char *doc;
void *ptr;
PyObject *obj;
@@ -68,7 +88,8 @@ typedef struct {
#define UFUNC_FPE_UNDERFLOW 4
#define UFUNC_FPE_INVALID 8
-#define UFUNC_ERR_DEFAULT 0 /* Error mode that avoids look-up (no checking) */
+/* Error mode that avoids look-up (no checking) */
+#define UFUNC_ERR_DEFAULT 0
#define UFUNC_OBJ_ISOBJECT 1
#define UFUNC_OBJ_NEEDS_API 2
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 121277282..41f6b5834 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -334,52 +334,6 @@ object_depth_and_dimension(PyObject *s, int max, npy_intp *dims)
return nd + 1;
}
-static void
-_strided_byte_copy(char *dst, npy_intp outstrides, char *src,
- npy_intp instrides, npy_intp N, int elsize)
-{
- npy_intp i, j;
- char *tout = dst;
- char *tin = src;
-
-#define _FAST_MOVE(_type_) \
- for(i=0; i<N; i++) { \
- ((_type_ *)tout)[0] = ((_type_ *)tin)[0]; \
- tin += instrides; \
- tout += outstrides; \
- } \
- return
-
- switch(elsize) {
- case 8:
- _FAST_MOVE(Int64);
- case 4:
- _FAST_MOVE(Int32);
- case 1:
- _FAST_MOVE(Int8);
- case 2:
- _FAST_MOVE(Int16);
- case 16:
- for (i = 0; i < N; i++) {
- ((Int64 *)tout)[0] = ((Int64 *)tin)[0];
- ((Int64 *)tout)[1] = ((Int64 *)tin)[1];
- tin += instrides;
- tout += outstrides;
- }
- return;
- default:
- for(i = 0; i < N; i++) {
- for(j=0; j<elsize; j++) {
- *tout++ = *tin++;
- }
- tin = tin + instrides - elsize;
- tout = tout + outstrides - elsize;
- }
- }
-#undef _FAST_MOVE
-
-}
-
NPY_NO_EXPORT void
_unaligned_strided_byte_copy(char *dst, npy_intp outstrides, char *src,
npy_intp instrides, npy_intp N, int elsize)
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 91680dbc9..e04bac783 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -589,27 +589,81 @@ _aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride),
}
static int
-get_cast_transfer_function(int aligned,
+get_nbo_cast_numeric_transfer_function(int aligned,
+ npy_intp src_stride, npy_intp dst_stride,
+ int src_type_num, int dst_type_num,
+ PyArray_StridedTransferFn **out_stransfer,
+ void **out_transferdata)
+{
+ /* Emit a warning if complex imaginary is being cast away */
+ if (PyTypeNum_ISCOMPLEX(src_type_num) &&
+ !PyTypeNum_ISCOMPLEX(dst_type_num) &&
+ !PyTypeNum_ISBOOL(dst_type_num)) {
+ PyObject *cls = NULL, *obj = NULL;
+ int ret;
+ obj = PyImport_ImportModule("numpy.core");
+ if (obj) {
+ cls = PyObject_GetAttrString(obj, "ComplexWarning");
+ Py_DECREF(obj);
+ }
+#if PY_VERSION_HEX >= 0x02050000
+ ret = PyErr_WarnEx(cls,
+ "Casting complex values to real discards "
+ "the imaginary part", 1);
+#else
+ ret = PyErr_Warn(cls,
+ "Casting complex values to real discards "
+ "the imaginary part");
+#endif
+ Py_XDECREF(cls);
+ if (ret < 0) {
+ return NPY_FAIL;
+ }
+ }
+
+ *out_stransfer = PyArray_GetStridedNumericCastFn(aligned,
+ src_stride, dst_stride,
+ src_type_num, dst_type_num);
+ *out_transferdata = NULL;
+ if (*out_stransfer == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "unexpected error in GetStridedNumericCastFn");
+ return NPY_FAIL;
+ }
+
+ return NPY_SUCCEED;
+}
+
+static int
+get_nbo_cast_transfer_function(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
PyArray_StridedTransferFn **out_stransfer,
void **out_transferdata,
- int *out_needs_api)
+ int *out_needs_api,
+ int *out_needs_wrap)
{
_strided_cast_data *data;
- void *todata = NULL, *fromdata = NULL;
PyArray_VectorUnaryFunc *castfunc;
+ PyArray_Descr *tmp_dtype;
npy_intp shape = 1, src_itemsize = src_dtype->elsize,
dst_itemsize = dst_dtype->elsize;
- PyArray_Descr *tmp_dtype;
- if (src_dtype->type_num == dst_dtype->type_num) {
- PyErr_SetString(PyExc_ValueError,
- "low level cast function is for unequal type numbers");
- return NPY_FAIL;
+ if (PyTypeNum_ISNUMBER(src_dtype->type_num) &&
+ PyTypeNum_ISNUMBER(dst_dtype->type_num)) {
+ *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
+ !PyArray_ISNBO(dst_dtype->byteorder);
+ return get_nbo_cast_numeric_transfer_function(aligned,
+ src_stride, dst_stride,
+ src_dtype->type_num, dst_dtype->type_num,
+ out_stransfer, out_transferdata);
}
+ *out_needs_wrap = !aligned ||
+ !PyArray_ISNBO(src_dtype->byteorder) ||
+ !PyArray_ISNBO(dst_dtype->byteorder);
+
/* Check the data types whose casting functions use API calls */
switch (src_dtype->type_num) {
case NPY_OBJECT:
@@ -700,29 +754,74 @@ get_cast_transfer_function(int aligned,
return NPY_FAIL;
}
-
/* If it's aligned and all native byte order, we're all done */
- if (aligned && PyArray_ISNBO(src_dtype->byteorder) &&
- PyArray_ISNBO(dst_dtype->byteorder)) {
- /* Choose the contiguous cast if we can */
- if (move_references && src_dtype->type_num == NPY_OBJECT) {
- *out_stransfer = _aligned_strided_to_strided_cast_decref_src;
+ if (move_references && src_dtype->type_num == NPY_OBJECT) {
+ *out_stransfer = _aligned_strided_to_strided_cast_decref_src;
+ }
+ else {
+ /*
+ * Use the contig version if the strides are contiguous or
+ * we're telling the caller to wrap the return, because
+ * the wrapping uses a contiguous buffer.
+ */
+ if ((src_stride == src_itemsize && dst_stride == dst_itemsize) ||
+ *out_needs_wrap) {
+ *out_stransfer = _aligned_contig_to_contig_cast;
}
else {
- if (src_stride == src_itemsize && dst_stride == dst_itemsize) {
- *out_stransfer = _aligned_contig_to_contig_cast;
- }
- else {
- *out_stransfer = _aligned_strided_to_strided_cast;
- }
+ *out_stransfer = _aligned_strided_to_strided_cast;
}
- *out_transferdata = data;
+ }
+ *out_transferdata = data;
+
+ return NPY_SUCCEED;
+}
+
+static int
+get_cast_transfer_function(int aligned,
+ npy_intp src_stride, npy_intp dst_stride,
+ PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+ int move_references,
+ PyArray_StridedTransferFn **out_stransfer,
+ void **out_transferdata,
+ int *out_needs_api)
+{
+ PyArray_StridedTransferFn *caststransfer;
+ void *castdata, *todata = NULL, *fromdata = NULL;
+ int needs_wrap = 0;
+ npy_intp src_itemsize = src_dtype->elsize,
+ dst_itemsize = dst_dtype->elsize;
+
+ if (src_dtype->type_num == dst_dtype->type_num) {
+ PyErr_SetString(PyExc_ValueError,
+ "low level cast function is for unequal type numbers");
+ return NPY_FAIL;
+ }
+
+ if (get_nbo_cast_transfer_function(aligned,
+ src_stride, dst_stride,
+ src_dtype, dst_dtype,
+ move_references,
+ &caststransfer,
+ &castdata,
+ out_needs_api,
+ &needs_wrap) != NPY_SUCCEED) {
+ return NPY_FAIL;
+ }
+
+ /*
+ * If all native byte order and doesn't need alignment wrapping,
+ * return the function
+ */
+ if (!needs_wrap) {
+ *out_stransfer = caststransfer;
+ *out_transferdata = castdata;
return NPY_SUCCEED;
}
/* Otherwise, we have to copy and/or swap to aligned temporaries */
else {
- PyArray_StridedTransferFn *tobuffer, *frombuffer, *casttransfer;
+ PyArray_StridedTransferFn *tobuffer, *frombuffer;
/* Get the copy/swap operation from src */
@@ -747,7 +846,7 @@ get_cast_transfer_function(int aligned,
src_stride, src_itemsize,
src_itemsize);
}
- /* If not complex, a paired swap */
+ /* If complex, a paired swap */
else {
tobuffer = PyArray_GetStridedCopySwapPairFn(aligned,
src_stride, src_itemsize,
@@ -782,7 +881,7 @@ get_cast_transfer_function(int aligned,
dst_itemsize, dst_stride,
dst_itemsize);
}
- /* If not complex, a paired swap */
+ /* If complex, a paired swap */
else {
frombuffer = PyArray_GetStridedCopySwapPairFn(aligned,
dst_itemsize, dst_stride,
@@ -790,30 +889,25 @@ get_cast_transfer_function(int aligned,
}
if (frombuffer == NULL || tobuffer == NULL) {
- PyArray_FreeStridedTransferData(data);
+ PyArray_FreeStridedTransferData(castdata);
PyArray_FreeStridedTransferData(todata);
PyArray_FreeStridedTransferData(fromdata);
return NPY_FAIL;
}
- /* If necessary, use the cast function with source decref */
- if (move_references && src_dtype->type_num == NPY_OBJECT) {
- *out_stransfer = _aligned_strided_to_strided_cast_decref_src;
- }
- /* Use the aligned contiguous cast otherwise */
- else {
- casttransfer = &_aligned_contig_to_contig_cast;
- }
+ *out_stransfer = caststransfer;
/* Wrap it all up in a new transfer function + data */
if (wrap_aligned_contig_transfer_function(
src_itemsize, dst_itemsize,
tobuffer, todata,
frombuffer, fromdata,
- casttransfer, data,
+ caststransfer, castdata,
PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT),
out_stransfer, out_transferdata) != NPY_SUCCEED) {
- PyArray_FreeStridedTransferData(data);
+ PyArray_FreeStridedTransferData(castdata);
+ PyArray_FreeStridedTransferData(todata);
+ PyArray_FreeStridedTransferData(fromdata);
return NPY_FAIL;
}
@@ -2652,6 +2746,26 @@ PyArray_GetDTypeTransferFunction(int aligned,
src_type_num = src_dtype->type_num;
dst_type_num = dst_dtype->type_num;
+ /* Common special case - number -> number NBO cast */
+ if (PyTypeNum_ISNUMBER(src_type_num) &&
+ PyTypeNum_ISNUMBER(dst_type_num) &&
+ PyArray_ISNBO(src_dtype->byteorder) &&
+ PyArray_ISNBO(dst_dtype->byteorder)) {
+ if (PyArray_EquivTypenums(src_type_num, dst_type_num)) {
+ *out_stransfer = PyArray_GetStridedCopyFn(aligned,
+ src_stride, dst_stride,
+ src_itemsize);
+ *out_transferdata = NULL;
+ return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED;
+ }
+ else {
+ return get_nbo_cast_numeric_transfer_function (aligned,
+ src_stride, dst_stride,
+ src_type_num, dst_type_num,
+ out_stransfer, out_transferdata);
+ }
+ }
+
/*
* If there are no references and the data types are equivalent,
* return a simple copy
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index c910e2992..84219df76 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -11,12 +11,13 @@
#include <numpy/ndarrayobject.h>
#include <numpy/ufuncobject.h>
#include <numpy/npy_cpu.h>
+#include <numpy/halffloat.h>
#include "lowlevel_strided_loops.h"
/* x86 platform works with unaligned reads and writes */
#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64))
-# define NPY_USE_UNALIGNED_ACCESS 1
+# define NPY_USE_UNALIGNED_ACCESS 0 //1
#else
# define NPY_USE_UNALIGNED_ACCESS 0
#endif
@@ -656,6 +657,280 @@ NPY_NO_EXPORT PyArray_StridedTransferFn *
/**end repeat**/
+/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/
+
+/**begin repeat
+ *
+ * #NAME1 = BOOL,
+ * UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ * BYTE, SHORT, INT, LONG, LONGLONG,
+ * HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ * CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name1 = bool,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * byte, short, int, long, longlong,
+ * half, float, double, longdouble,
+ * cfloat, cdouble, clongdouble#
+ * #rname1 = bool,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * byte, short, int, long, longlong,
+ * half, float, double, longdouble,
+ * float, double, longdouble#
+ * #is_half1 = 0*11, 1, 0*6#
+ * #is_float1 = 0*12, 1, 0, 0, 1, 0, 0#
+ * #is_double1 = 0*13, 1, 0, 0, 1, 0#
+ * #is_complex1 = 0*15, 1*3#
+ */
+
+/**begin repeat1
+ *
+ * #NAME2 = BOOL,
+ * UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ * BYTE, SHORT, INT, LONG, LONGLONG,
+ * HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ * CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name2 = bool,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * byte, short, int, long, longlong,
+ * half, float, double, longdouble,
+ * cfloat, cdouble, clongdouble#
+ * #rname2 = bool,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * byte, short, int, long, longlong,
+ * half, float, double, longdouble,
+ * float, double, longdouble#
+ * #is_half2 = 0*11, 1, 0*6#
+ * #is_float2 = 0*12, 1, 0, 0, 1, 0, 0#
+ * #is_double2 = 0*13, 1, 0, 0, 1, 0#
+ * #is_complex2 = 0*15, 1*3#
+ */
+
+/**begin repeat2
+ * #prefix = _aligned,,_aligned_contig,_contig#
+ * #aligned = 1,0,1,0#
+ * #contig = 0,0,1,1#
+ */
+
+#if !(NPY_USE_UNALIGNED_ACCESS && !@aligned@)
+
+/* For half types, don't use actual double/float types in conversion */
+#if @is_half1@ || @is_half2@
+
+# if @is_float1@
+# define _TYPE1 npy_uint32
+# elif @is_double1@
+# define _TYPE1 npy_uint64
+# else
+# define _TYPE1 npy_@rname1@
+# endif
+
+# if @is_float2@
+# define _TYPE2 npy_uint32
+# elif @is_double2@
+# define _TYPE2 npy_uint64
+# else
+# define _TYPE2 npy_@rname2@
+# endif
+
+#else
+
+#define _TYPE1 npy_@rname1@
+#define _TYPE2 npy_@rname2@
+
+#endif
+
+/* Determine an appropriate casting conversion function */
+#if @is_half1@
+
+# if @is_float2@
+# define _CONVERT_FN(x) npy_halfbits_to_floatbits(x)
+# elif @is_double2@
+# define _CONVERT_FN(x) npy_halfbits_to_doublebits(x)
+# elif @is_half2@
+# define _CONVERT_FN(x) (x)
+# else
+# define _CONVERT_FN(x) ((_TYPE2)npy_half_to_float(x))
+# endif
+
+#elif @is_half2@
+
+# if @is_float1@
+# define _CONVERT_FN(x) npy_floatbits_to_halfbits(x)
+# elif @is_double1@
+# define _CONVERT_FN(x) npy_doublebits_to_halfbits(x)
+# else
+# define _CONVERT_FN(x) npy_float_to_half((float)x)
+# endif
+
+#else
+
+# define _CONVERT_FN(x) ((_TYPE2)x)
+
+#endif
+
+static void
+@prefix@_cast_@name1@_to_@name2@(
+ char *dst, npy_intp dst_stride,
+ char *src, npy_intp src_stride,
+ npy_intp N, npy_intp NPY_UNUSED(src_itemsize),
+ void *NPY_UNUSED(data))
+{
+#if @is_complex1@
+ _TYPE1 src_value[2];
+#elif !@aligned@
+ _TYPE1 src_value;
+#endif
+#if @is_complex2@
+ _TYPE2 dst_value[2];
+#elif !@aligned@
+ _TYPE2 dst_value;
+#endif
+
+ /*printf("@prefix@_cast_@name1@_to_@name2@\n");*/
+
+ while (N--) {
+#if @aligned@
+# if @is_complex1@
+ src_value[0] = ((_TYPE1 *)src)[0];
+ src_value[1] = ((_TYPE1 *)src)[1];
+# elif !@aligned@
+ src_value = *((_TYPE1 *)src);
+# endif
+#else
+ memcpy(&src_value, src, sizeof(src_value));
+#endif
+
+/* Do the cast */
+#if @is_complex1@
+# if @is_complex2@
+ dst_value[0] = _CONVERT_FN(src_value[0]);
+ dst_value[1] = _CONVERT_FN(src_value[1]);
+# elif !@aligned@
+ dst_value = _CONVERT_FN(src_value[0]);
+# else
+ *(_TYPE2 *)dst = _CONVERT_FN(src_value[0]);
+# endif
+#else
+# if @is_complex2@
+# if !@aligned@
+ dst_value[0] = _CONVERT_FN(src_value);
+# else
+ dst_value[0] = _CONVERT_FN(*(_TYPE1 *)src);
+# endif
+ dst_value[1] = 0;
+# elif !@aligned@
+ dst_value = _CONVERT_FN(src_value);
+# else
+ *(_TYPE2 *)dst = _CONVERT_FN(*(_TYPE1 *)src);
+# endif
+#endif
+
+#if @aligned@
+# if @is_complex2@
+ ((_TYPE2 *)dst)[0] = dst_value[0];
+ ((_TYPE2 *)dst)[1] = dst_value[1];
+# elif !@aligned@
+ *((_TYPE2 *)dst) = dst_value;
+# endif
+#else
+ memcpy(dst, &dst_value, sizeof(dst_value));
+#endif
+
+#if @contig@
+ dst += sizeof(npy_@name2@);
+ src += sizeof(npy_@name1@);
+#else
+ dst += dst_stride;
+ src += src_stride;
+#endif
+ }
+}
+
+#undef _CONVERT_FN
+#undef _TYPE2
+#undef _TYPE1
+
+#endif
+
+/**end repeat2**/
+
+/**end repeat1**/
+
+/**end repeat**/
+
+NPY_NO_EXPORT PyArray_StridedTransferFn *
+PyArray_GetStridedNumericCastFn(npy_intp aligned, npy_intp src_stride,
+ npy_intp dst_stride,
+ int src_type_num, int dst_type_num)
+{
+ switch (src_type_num) {
+/**begin repeat
+ *
+ * #NAME1 = BOOL,
+ * UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ * BYTE, SHORT, INT, LONG, LONGLONG,
+ * HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ * CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name1 = bool,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * byte, short, int, long, longlong,
+ * half, float, double, longdouble,
+ * cfloat, cdouble, clongdouble#
+ */
+
+ case NPY_@NAME1@:
+ //printf("test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);
+ switch (dst_type_num) {
+/**begin repeat1
+ *
+ * #NAME2 = BOOL,
+ * UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ * BYTE, SHORT, INT, LONG, LONGLONG,
+ * HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ * CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name2 = bool,
+ * ubyte, ushort, uint, ulong, ulonglong,
+ * byte, short, int, long, longlong,
+ * half, float, double, longdouble,
+ * cfloat, cdouble, clongdouble#
+ */
+
+ case NPY_@NAME2@:
+ //printf("ret fn %d %d\n", NPY_@NAME1@, NPY_@NAME2@);
+# if NPY_USE_UNALIGNED_ACCESS
+ if (src_stride == sizeof(npy_@name1@) &&
+ dst_stride == sizeof(npy_@name2@)) {
+ return &_aligned_contig_cast_@name1@_to_@name2@;
+ }
+ else {
+ return &_aligned_cast_@name1@_to_@name2@;
+ }
+# else
+ if (src_stride == sizeof(npy_@name1@) &&
+ dst_stride == sizeof(npy_@name2@)) {
+ return aligned ?
+ &_aligned_contig_cast_@name1@_to_@name2@ :
+ &_contig_cast_@name1@_to_@name2@;
+ }
+ else {
+ return aligned ? &_aligned_cast_@name1@_to_@name2@ :
+ &_cast_@name1@_to_@name2@;
+ }
+# endif
+
+/**end repeat1**/
+ }
+ //printf("switched test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);
+
+/**end repeat**/
+ }
+
+ return NULL;
+}
+
+
+/************** STRIDED TRANSFER FUNCTION MEMORY MANAGEMENT **************/
+
typedef void (*_npy_stridedtransfer_dealloc)(void *);
NPY_NO_EXPORT void
PyArray_FreeStridedTransferData(void *transferdata)
@@ -680,6 +955,7 @@ PyArray_CopyStridedTransferData(void *transferdata)
return NULL;
}
+/****************** PRIMITIVE FLAT TO/FROM NDIM FUNCTIONS ******************/
NPY_NO_EXPORT npy_intp
PyArray_TransferNDimToStrided(npy_intp ndim,
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.h b/numpy/core/src/multiarray/lowlevel_strided_loops.h
index e3427e3c6..34cdf3c39 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.h
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.h
@@ -109,6 +109,17 @@ PyArray_GetStridedZeroPadCopyFn(int aligned,
void **outtransferdata);
/*
+ * For casts between built-in numeric types,
+ * this produces a function pointer for casting from src_type_num
+ * to dst_type_num. If a conversion is unsupported, returns NULL
+ * without setting a Python exception.
+ */
+NPY_NO_EXPORT PyArray_StridedTransferFn *
+PyArray_GetStridedNumericCastFn(npy_intp aligned, npy_intp src_stride,
+ npy_intp dst_stride,
+ int src_type_num, int dst_type_num);
+
+/*
* If it's possible, gives back a transfer function which casts and/or
* byte swaps data with the dtype 'src_dtype' into data with the dtype
* 'dst_dtype'. If the outtransferdata is populated with a non-NULL value,
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index d2d57d125..6a25e3a4c 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -3509,6 +3509,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
/* This contains the all the inputs and outputs */
PyArrayObject *op[NPY_MAXARGS];
PyArray_Descr *dtype[NPY_MAXARGS];
+ PyArray_Descr *result_type;
/* TODO: For 1.6, the default should probably be NPY_CORDER */
NPY_ORDER order = NPY_KEEPORDER;
@@ -3535,6 +3536,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
op[i] = NULL;
dtype[i] = NULL;
}
+ result_type = NULL;
/* Get input arguments */
for(i = 0; i < nin; ++i) {
@@ -3555,6 +3557,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
op[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, context);
/* Start with a native byte-order data type */
+ /*
if (PyArray_ISNBO(PyArray_DESCR(op[i])->byteorder)) {
dtype[i] = PyArray_DESCR(op[i]);
Py_INCREF(dtype[i]);
@@ -3563,6 +3566,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
dtype[i] = PyArray_DescrNewByteorder(PyArray_DESCR(op[i]),
NPY_NATIVE);
}
+ */
}
/* Get positional output arguments */
@@ -3670,13 +3674,42 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
}
}
}
-
- return NULL;
+
+ /*
+ * Determine the result type. A better approach would be for
+ * the ufunc to provide a function which gives back the result
+ * type and inner loop function. The default would work as follows.
+ */
+ result_type = PyArray_ResultType(nin, op, 0, NULL);
+ if (result_type == NULL) {
+ goto fail;
+ }
+ /* Take into account the types of any output parameters */
+ for (i = nin; i < nargs; ++i) {
+ PyArray_Descr *tmp;
+ if (op[i] != NULL) {
+ tmp = PyArray_PromoteTypes(result_type, PyArray_DESCR(op[i]));
+ if (tmp == NULL) {
+ goto fail;
+ }
+ Py_DECREF(result_type);
+ result_type = tmp;
+ }
+ }
+
+ /* Find the function loop */
+
+ printf("result type: ");
+ PyObject_Print((PyObject *)result_type, stdout, 0);
+ printf("\n");
+
+ PyErr_SetString(PyExc_RuntimeError, "implementation is not finished!");
fail:
for (i = 0; i < niter; ++i) {
Py_XDECREF(op[i]);
Py_XDECREF(dtype[i]);
}
+ Py_XDECREF(result_type);
return NULL;
}