ENH: core: Added numeric casting strided transfer functions, to speed up simple casts

author: Mark Wiebe <mwwiebe@gmail.com> 2011-01-19 12:31:46 -0800
committer: Mark Wiebe <mwwiebe@gmail.com> 2011-01-19 12:55:33 -0800
commit: a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e (patch)
tree: c7da6bd334c58dec111201999b6951b441b92a55
parent: fe08a916cf275ecd21c1b32b22aa3b8d2ca36b33 (diff)
download: numpy-a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e.tar.gz
7 files changed, 496 insertions, 86 deletions
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index 1d98a8c4d..52c5d8bfc 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -292,6 +292,7 @@ multiarray_funcs_api = {
     'NpyIter_GetWriteFlags':                258,
     'NpyIter_DebugPrint':                   259,
     'NpyIter_IterationNeedsAPI':            260,
+    #
     'PyArray_CastingConverter':             261,
     'PyArray_CountNonzero':                 262,
     'PyArray_PromoteTypes':                 263,
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 0fec32183..a575795a4 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -11,13 +11,33 @@ typedef void (*PyUFuncGenericFunction) (char **, npy_intp *, npy_intp *, void *)
 
 typedef struct {
         PyObject_HEAD
+        /*
+         * nin: Number of inputs
+         * nout: Number of outputs
+         * nargs: Always nin + nout (Why is it stored?)
+         */
         int nin, nout, nargs;
+
+        /* Identity for reduction, either PyUFunc_One or PyUFunc_Zero */
         int identity;
+
+        /* Array of one-dimensional core loops */
         PyUFuncGenericFunction *functions;
+        /* Array of funcdata that gets passed into the functions */
         void **data;
+        /* The number of elements in 'functions' and 'data' */
         int ntypes;
+
+        /* Does not appear to be used */
         int check_return;
-        char *name, *types;
+
+        /* The name of the ufunc */
+        char *name;
+
+        /* Array of type numbers, of size ('nargs' * 'ntypes') */
+        char *types;
+
+        /* Documentation string */
         char *doc;
         void *ptr;
         PyObject *obj;
@@ -68,7 +88,8 @@ typedef struct {
 #define UFUNC_FPE_UNDERFLOW     4
 #define UFUNC_FPE_INVALID       8
 
-#define UFUNC_ERR_DEFAULT  0      /* Error mode that avoids look-up (no checking) */
+/* Error mode that avoids look-up (no checking) */
+#define UFUNC_ERR_DEFAULT       0
 
 #define UFUNC_OBJ_ISOBJECT      1
 #define UFUNC_OBJ_NEEDS_API     2
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 121277282..41f6b5834 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -334,52 +334,6 @@ object_depth_and_dimension(PyObject *s, int max, npy_intp *dims)
     return nd + 1;
 }
 
-static void
-_strided_byte_copy(char *dst, npy_intp outstrides, char *src,
-                    npy_intp instrides, npy_intp N, int elsize)
-{
-    npy_intp i, j;
-    char *tout = dst;
-    char *tin = src;
-
-#define _FAST_MOVE(_type_)                              \
-    for(i=0; i<N; i++) {                               \
-        ((_type_ *)tout)[0] = ((_type_ *)tin)[0];       \
-        tin += instrides;                               \
-        tout += outstrides;                             \
-    }                                                   \
-    return
-
-    switch(elsize) {
-    case 8:
-        _FAST_MOVE(Int64);
-    case 4:
-        _FAST_MOVE(Int32);
-    case 1:
-        _FAST_MOVE(Int8);
-    case 2:
-        _FAST_MOVE(Int16);
-    case 16:
-        for (i = 0; i < N; i++) {
-            ((Int64 *)tout)[0] = ((Int64 *)tin)[0];
-            ((Int64 *)tout)[1] = ((Int64 *)tin)[1];
-            tin += instrides;
-            tout += outstrides;
-        }
-        return;
-    default:
-        for(i = 0; i < N; i++) {
-            for(j=0; j<elsize; j++) {
-                *tout++ = *tin++;
-            }
-            tin = tin + instrides - elsize;
-            tout = tout + outstrides - elsize;
-        }
-    }
-#undef _FAST_MOVE
-
-}
-
 NPY_NO_EXPORT void
 _unaligned_strided_byte_copy(char *dst, npy_intp outstrides, char *src,
                              npy_intp instrides, npy_intp N, int elsize)
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 91680dbc9..e04bac783 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -589,27 +589,81 @@ _aligned_contig_to_contig_cast(char *dst, npy_intp NPY_UNUSED(dst_stride),
 }
 
 static int
-get_cast_transfer_function(int aligned,
+get_nbo_cast_numeric_transfer_function(int aligned,
+                            npy_intp src_stride, npy_intp dst_stride,
+                            int src_type_num, int dst_type_num,
+                            PyArray_StridedTransferFn **out_stransfer,
+                            void **out_transferdata)
+{
+    /* Emit a warning if complex imaginary is being cast away */
+    if (PyTypeNum_ISCOMPLEX(src_type_num) &&
+                    !PyTypeNum_ISCOMPLEX(dst_type_num) &&
+                    !PyTypeNum_ISBOOL(dst_type_num)) {
+        PyObject *cls = NULL, *obj = NULL;
+        int ret;
+        obj = PyImport_ImportModule("numpy.core");
+        if (obj) {
+            cls = PyObject_GetAttrString(obj, "ComplexWarning");
+            Py_DECREF(obj);
+        }
+#if PY_VERSION_HEX >= 0x02050000
+        ret = PyErr_WarnEx(cls,
+                           "Casting complex values to real discards "
+                           "the imaginary part", 1);
+#else
+        ret = PyErr_Warn(cls,
+                         "Casting complex values to real discards "
+                         "the imaginary part");
+#endif
+        Py_XDECREF(cls);
+        if (ret < 0) {
+            return NPY_FAIL;
+        }
+    }
+
+    *out_stransfer = PyArray_GetStridedNumericCastFn(aligned,
+                                src_stride, dst_stride,
+                                src_type_num, dst_type_num);
+    *out_transferdata = NULL;
+    if (*out_stransfer == NULL) {
+        PyErr_SetString(PyExc_ValueError,
+                "unexpected error in GetStridedNumericCastFn");
+        return NPY_FAIL;
+    }
+
+    return NPY_SUCCEED;
+}
+
+static int
+get_nbo_cast_transfer_function(int aligned,
                             npy_intp src_stride, npy_intp dst_stride,
                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
                             int move_references,
                             PyArray_StridedTransferFn **out_stransfer,
                             void **out_transferdata,
-                            int *out_needs_api)
+                            int *out_needs_api,
+                            int *out_needs_wrap)
 {
     _strided_cast_data *data;
-    void *todata = NULL, *fromdata = NULL;
     PyArray_VectorUnaryFunc *castfunc;
+    PyArray_Descr *tmp_dtype;
     npy_intp shape = 1, src_itemsize = src_dtype->elsize,
             dst_itemsize = dst_dtype->elsize;
-    PyArray_Descr *tmp_dtype;
 
-    if (src_dtype->type_num == dst_dtype->type_num) {
-        PyErr_SetString(PyExc_ValueError,
-                "low level cast function is for unequal type numbers");
-        return NPY_FAIL;
+    if (PyTypeNum_ISNUMBER(src_dtype->type_num) &&
+                    PyTypeNum_ISNUMBER(dst_dtype->type_num)) {
+        *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) ||
+                          !PyArray_ISNBO(dst_dtype->byteorder);
+        return get_nbo_cast_numeric_transfer_function(aligned,
+                                    src_stride, dst_stride,
+                                    src_dtype->type_num, dst_dtype->type_num,
+                                    out_stransfer, out_transferdata);
     }
 
+    *out_needs_wrap = !aligned ||
+                      !PyArray_ISNBO(src_dtype->byteorder) ||
+                      !PyArray_ISNBO(dst_dtype->byteorder);
+
     /* Check the data types whose casting functions use API calls */
     switch (src_dtype->type_num) {
         case NPY_OBJECT:
@@ -700,29 +754,74 @@ get_cast_transfer_function(int aligned,
         return NPY_FAIL;
     }
 
-
     /* If it's aligned and all native byte order, we're all done */
-    if (aligned && PyArray_ISNBO(src_dtype->byteorder) &&
-                   PyArray_ISNBO(dst_dtype->byteorder)) {
-        /* Choose the contiguous cast if we can */
-        if (move_references && src_dtype->type_num == NPY_OBJECT) {
-            *out_stransfer = _aligned_strided_to_strided_cast_decref_src;
+    if (move_references && src_dtype->type_num == NPY_OBJECT) {
+        *out_stransfer = _aligned_strided_to_strided_cast_decref_src;
+    }
+    else {
+        /*
+         * Use the contig version if the strides are contiguous or
+         * we're telling the caller to wrap the return, because
+         * the wrapping uses a contiguous buffer.
+         */
+        if ((src_stride == src_itemsize && dst_stride == dst_itemsize) ||
+                        *out_needs_wrap) {
+            *out_stransfer = _aligned_contig_to_contig_cast;
         }
         else {
-            if (src_stride == src_itemsize && dst_stride == dst_itemsize) {
-                *out_stransfer = _aligned_contig_to_contig_cast;
-            }
-            else {
-                *out_stransfer = _aligned_strided_to_strided_cast;
-            }
+            *out_stransfer = _aligned_strided_to_strided_cast;
         }
-        *out_transferdata = data;
+    }
+    *out_transferdata = data;
+
+    return NPY_SUCCEED;
+}
+
+static int
+get_cast_transfer_function(int aligned,
+                            npy_intp src_stride, npy_intp dst_stride,
+                            PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
+                            int move_references,
+                            PyArray_StridedTransferFn **out_stransfer,
+                            void **out_transferdata,
+                            int *out_needs_api)
+{
+    PyArray_StridedTransferFn *caststransfer;
+    void *castdata, *todata = NULL, *fromdata = NULL;
+    int needs_wrap = 0;
+    npy_intp src_itemsize = src_dtype->elsize,
+            dst_itemsize = dst_dtype->elsize;
+
+    if (src_dtype->type_num == dst_dtype->type_num) {
+        PyErr_SetString(PyExc_ValueError,
+                "low level cast function is for unequal type numbers");
+        return NPY_FAIL;
+    }
+
+    if (get_nbo_cast_transfer_function(aligned,
+                            src_stride, dst_stride,
+                            src_dtype, dst_dtype,
+                            move_references,
+                            &caststransfer,
+                            &castdata,
+                            out_needs_api,
+                            &needs_wrap) != NPY_SUCCEED) {
+        return NPY_FAIL;
+    }
+
+    /*
+     * If all native byte order and doesn't need alignment wrapping,
+     * return the function
+     */
+    if (!needs_wrap) {
+        *out_stransfer = caststransfer;
+        *out_transferdata = castdata;
 
         return NPY_SUCCEED;
     }
     /* Otherwise, we have to copy and/or swap to aligned temporaries */
     else {
-        PyArray_StridedTransferFn *tobuffer, *frombuffer, *casttransfer;
+        PyArray_StridedTransferFn *tobuffer, *frombuffer;
 
         /* Get the copy/swap operation from src */
 
@@ -747,7 +846,7 @@ get_cast_transfer_function(int aligned,
                                         src_stride, src_itemsize,
                                         src_itemsize);
         }
-        /* If not complex, a paired swap */
+        /* If complex, a paired swap */
         else {
             tobuffer = PyArray_GetStridedCopySwapPairFn(aligned,
                                         src_stride, src_itemsize,
@@ -782,7 +881,7 @@ get_cast_transfer_function(int aligned,
                                         dst_itemsize, dst_stride,
                                         dst_itemsize);
         }
-        /* If not complex, a paired swap */
+        /* If complex, a paired swap */
         else {
             frombuffer = PyArray_GetStridedCopySwapPairFn(aligned,
                                         dst_itemsize, dst_stride,
@@ -790,30 +889,25 @@ get_cast_transfer_function(int aligned,
         }
 
         if (frombuffer == NULL || tobuffer == NULL) {
-            PyArray_FreeStridedTransferData(data);
+            PyArray_FreeStridedTransferData(castdata);
             PyArray_FreeStridedTransferData(todata);
             PyArray_FreeStridedTransferData(fromdata);
             return NPY_FAIL;
         }
 
-        /* If necessary, use the cast function with source decref */
-        if (move_references && src_dtype->type_num == NPY_OBJECT) {
-            *out_stransfer = _aligned_strided_to_strided_cast_decref_src;
-        }
-        /* Use the aligned contiguous cast otherwise */
-        else {
-            casttransfer = &_aligned_contig_to_contig_cast;
-        }
+        *out_stransfer = caststransfer;
 
         /* Wrap it all up in a new transfer function + data */
         if (wrap_aligned_contig_transfer_function(
                             src_itemsize, dst_itemsize,
                             tobuffer, todata,
                             frombuffer, fromdata,
-                            casttransfer, data,
+                            caststransfer, castdata,
                             PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT),
                             out_stransfer, out_transferdata) != NPY_SUCCEED) {
-            PyArray_FreeStridedTransferData(data);
+            PyArray_FreeStridedTransferData(castdata);
+            PyArray_FreeStridedTransferData(todata);
+            PyArray_FreeStridedTransferData(fromdata);
             return NPY_FAIL;
         }
 
@@ -2652,6 +2746,26 @@ PyArray_GetDTypeTransferFunction(int aligned,
     src_type_num = src_dtype->type_num;
     dst_type_num = dst_dtype->type_num;
 
+    /* Common special case - number -> number NBO cast */
+    if (PyTypeNum_ISNUMBER(src_type_num) &&
+                    PyTypeNum_ISNUMBER(dst_type_num) &&
+                    PyArray_ISNBO(src_dtype->byteorder) &&
+                    PyArray_ISNBO(dst_dtype->byteorder)) {
+        if (PyArray_EquivTypenums(src_type_num, dst_type_num)) {
+            *out_stransfer = PyArray_GetStridedCopyFn(aligned,
+                                        src_stride, dst_stride,
+                                        src_itemsize);
+            *out_transferdata = NULL;
+            return (*out_stransfer == NULL) ? NPY_FAIL : NPY_SUCCEED;
+        }
+        else {
+            return get_nbo_cast_numeric_transfer_function (aligned,
+                                        src_stride, dst_stride,
+                                        src_type_num, dst_type_num,
+                                        out_stransfer, out_transferdata);
+        }
+    }
+
     /*
      * If there are no references and the data types are equivalent,
      * return a simple copy
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index c910e2992..84219df76 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -11,12 +11,13 @@
 #include <numpy/ndarrayobject.h>
 #include <numpy/ufuncobject.h>
 #include <numpy/npy_cpu.h>
+#include <numpy/halffloat.h>
 
 #include "lowlevel_strided_loops.h"
 
 /* x86 platform works with unaligned reads and writes */
 #if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64))
-#  define NPY_USE_UNALIGNED_ACCESS 1
+#  define NPY_USE_UNALIGNED_ACCESS 0 //1
 #else
 #  define NPY_USE_UNALIGNED_ACCESS 0
 #endif
@@ -656,6 +657,280 @@ NPY_NO_EXPORT PyArray_StridedTransferFn *
 
 /**end repeat**/
 
+/************* STRIDED CASTING SPECIALIZED FUNCTIONS *************/
+
+/**begin repeat
+ *
+ * #NAME1 = BOOL,
+ *          UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ *          BYTE, SHORT, INT, LONG, LONGLONG,
+ *          HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *          CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name1 = bool,
+ *          ubyte, ushort, uint, ulong, ulonglong,
+ *          byte, short, int, long, longlong,
+ *          half, float, double, longdouble,
+ *          cfloat, cdouble, clongdouble#
+ * #rname1 = bool,
+ *          ubyte, ushort, uint, ulong, ulonglong,
+ *          byte, short, int, long, longlong,
+ *          half, float, double, longdouble,
+ *          float, double, longdouble#
+ * #is_half1 = 0*11, 1, 0*6#
+ * #is_float1 = 0*12, 1, 0, 0, 1, 0, 0#
+ * #is_double1 = 0*13, 1, 0, 0, 1, 0#
+ * #is_complex1 = 0*15, 1*3#
+ */
+
+/**begin repeat1
+ *
+ * #NAME2 = BOOL,
+ *          UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ *          BYTE, SHORT, INT, LONG, LONGLONG,
+ *          HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *          CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name2 = bool,
+ *          ubyte, ushort, uint, ulong, ulonglong,
+ *          byte, short, int, long, longlong,
+ *          half, float, double, longdouble,
+ *          cfloat, cdouble, clongdouble#
+ * #rname2 = bool,
+ *          ubyte, ushort, uint, ulong, ulonglong,
+ *          byte, short, int, long, longlong,
+ *          half, float, double, longdouble,
+ *          float, double, longdouble#
+ * #is_half2 = 0*11, 1, 0*6#
+ * #is_float2 = 0*12, 1, 0, 0, 1, 0, 0#
+ * #is_double2 = 0*13, 1, 0, 0, 1, 0#
+ * #is_complex2 = 0*15, 1*3#
+ */
+
+/**begin repeat2
+ * #prefix = _aligned,,_aligned_contig,_contig#
+ * #aligned = 1,0,1,0#
+ * #contig = 0,0,1,1#
+ */
+
+#if !(NPY_USE_UNALIGNED_ACCESS && !@aligned@)
+
+/* For half types, don't use actual double/float types in conversion */
+#if @is_half1@ || @is_half2@
+
+#  if @is_float1@
+#    define _TYPE1 npy_uint32
+#  elif @is_double1@
+#    define _TYPE1 npy_uint64
+#  else
+#    define _TYPE1 npy_@rname1@
+#  endif
+
+#  if @is_float2@
+#    define _TYPE2 npy_uint32
+#  elif @is_double2@
+#    define _TYPE2 npy_uint64
+#  else
+#    define _TYPE2 npy_@rname2@
+#  endif
+
+#else
+
+#define _TYPE1 npy_@rname1@
+#define _TYPE2 npy_@rname2@
+
+#endif
+
+/* Determine an appropriate casting conversion function */
+#if @is_half1@
+
+#  if @is_float2@
+#    define _CONVERT_FN(x) npy_halfbits_to_floatbits(x)
+#  elif @is_double2@
+#    define _CONVERT_FN(x) npy_halfbits_to_doublebits(x)
+#  elif @is_half2@
+#    define _CONVERT_FN(x) (x)
+#  else
+#    define _CONVERT_FN(x) ((_TYPE2)npy_half_to_float(x))
+#  endif
+
+#elif @is_half2@
+
+#  if @is_float1@
+#    define _CONVERT_FN(x) npy_floatbits_to_halfbits(x)
+#  elif @is_double1@
+#    define _CONVERT_FN(x) npy_doublebits_to_halfbits(x)
+#  else
+#    define _CONVERT_FN(x) npy_float_to_half((float)x)
+#  endif
+
+#else
+
+#    define _CONVERT_FN(x) ((_TYPE2)x)
+
+#endif
+
+static void
+@prefix@_cast_@name1@_to_@name2@(
+                        char *dst, npy_intp dst_stride,
+                        char *src, npy_intp src_stride,
+                        npy_intp N, npy_intp NPY_UNUSED(src_itemsize),
+                        void *NPY_UNUSED(data))
+{
+#if @is_complex1@
+    _TYPE1 src_value[2];
+#elif !@aligned@
+    _TYPE1 src_value;
+#endif
+#if @is_complex2@
+    _TYPE2 dst_value[2];
+#elif !@aligned@
+    _TYPE2 dst_value;
+#endif
+
+    /*printf("@prefix@_cast_@name1@_to_@name2@\n");*/
+
+    while (N--) {
+#if @aligned@
+#  if @is_complex1@
+        src_value[0] = ((_TYPE1 *)src)[0];
+        src_value[1] = ((_TYPE1 *)src)[1];
+#  elif !@aligned@
+        src_value = *((_TYPE1 *)src);
+#  endif
+#else
+        memcpy(&src_value, src, sizeof(src_value));
+#endif
+
+/* Do the cast */
+#if @is_complex1@
+#  if @is_complex2@
+    dst_value[0] = _CONVERT_FN(src_value[0]);
+    dst_value[1] = _CONVERT_FN(src_value[1]);
+#  elif !@aligned@
+    dst_value = _CONVERT_FN(src_value[0]);
+#  else
+    *(_TYPE2 *)dst = _CONVERT_FN(src_value[0]);
+#  endif
+#else
+#  if @is_complex2@
+#    if !@aligned@
+    dst_value[0] = _CONVERT_FN(src_value);
+#    else
+    dst_value[0] = _CONVERT_FN(*(_TYPE1 *)src);
+#    endif
+    dst_value[1] = 0;
+#  elif !@aligned@
+    dst_value = _CONVERT_FN(src_value);
+#  else
+    *(_TYPE2 *)dst = _CONVERT_FN(*(_TYPE1 *)src);
+#  endif
+#endif
+
+#if @aligned@
+#  if @is_complex2@
+        ((_TYPE2 *)dst)[0] = dst_value[0];
+        ((_TYPE2 *)dst)[1] = dst_value[1];
+#  elif !@aligned@
+        *((_TYPE2 *)dst) = dst_value;
+#  endif
+#else
+        memcpy(dst, &dst_value, sizeof(dst_value));
+#endif
+
+#if @contig@
+        dst += sizeof(npy_@name2@);
+        src += sizeof(npy_@name1@);
+#else
+        dst += dst_stride;
+        src += src_stride;
+#endif
+    }
+}
+
+#undef _CONVERT_FN
+#undef _TYPE2
+#undef _TYPE1
+
+#endif
+
+/**end repeat2**/
+
+/**end repeat1**/
+
+/**end repeat**/
+
+NPY_NO_EXPORT PyArray_StridedTransferFn *
+PyArray_GetStridedNumericCastFn(npy_intp aligned, npy_intp src_stride,
+                             npy_intp dst_stride,
+                             int src_type_num, int dst_type_num)
+{
+    switch (src_type_num) {
+/**begin repeat
+ *
+ * #NAME1 = BOOL,
+ *          UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ *          BYTE, SHORT, INT, LONG, LONGLONG,
+ *          HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *          CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name1 = bool,
+ *          ubyte, ushort, uint, ulong, ulonglong,
+ *          byte, short, int, long, longlong,
+ *          half, float, double, longdouble,
+ *          cfloat, cdouble, clongdouble#
+ */
+
+        case NPY_@NAME1@:
+            //printf("test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);
+            switch (dst_type_num) {
+/**begin repeat1
+ *
+ * #NAME2 = BOOL,
+ *          UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ *          BYTE, SHORT, INT, LONG, LONGLONG,
+ *          HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *          CFLOAT, CDOUBLE, CLONGDOUBLE#
+ * #name2 = bool,
+ *          ubyte, ushort, uint, ulong, ulonglong,
+ *          byte, short, int, long, longlong,
+ *          half, float, double, longdouble,
+ *          cfloat, cdouble, clongdouble#
+ */
+
+                case NPY_@NAME2@:
+                    //printf("ret fn %d %d\n", NPY_@NAME1@, NPY_@NAME2@);
+#  if NPY_USE_UNALIGNED_ACCESS
+                    if (src_stride == sizeof(npy_@name1@) &&
+                                dst_stride == sizeof(npy_@name2@)) {
+                        return &_aligned_contig_cast_@name1@_to_@name2@;
+                    }
+                    else {
+                        return &_aligned_cast_@name1@_to_@name2@;
+                    }
+#  else
+                    if (src_stride == sizeof(npy_@name1@) &&
+                                dst_stride == sizeof(npy_@name2@)) {
+                        return aligned ?
+                                    &_aligned_contig_cast_@name1@_to_@name2@ :
+                                    &_contig_cast_@name1@_to_@name2@;
+                    }
+                    else {
+                        return aligned ? &_aligned_cast_@name1@_to_@name2@ :
+                                         &_cast_@name1@_to_@name2@;
+                    }
+#  endif
+
+/**end repeat1**/
+            }
+            //printf("switched test fn %d - second %d\n", NPY_@NAME1@, dst_type_num);
+
+/**end repeat**/
+    }
+
+    return NULL;
+}
+
+
+/************** STRIDED TRANSFER FUNCTION MEMORY MANAGEMENT **************/
+
 typedef void (*_npy_stridedtransfer_dealloc)(void *);
 NPY_NO_EXPORT void
 PyArray_FreeStridedTransferData(void *transferdata)
@@ -680,6 +955,7 @@ PyArray_CopyStridedTransferData(void *transferdata)
     return NULL;
 }
 
+/****************** PRIMITIVE FLAT TO/FROM NDIM FUNCTIONS ******************/
 
 NPY_NO_EXPORT npy_intp
 PyArray_TransferNDimToStrided(npy_intp ndim,
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.h b/numpy/core/src/multiarray/lowlevel_strided_loops.h
index e3427e3c6..34cdf3c39 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.h
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.h
@@ -109,6 +109,17 @@ PyArray_GetStridedZeroPadCopyFn(int aligned,
                             void **outtransferdata);
 
 /*
+ * For casts between built-in numeric types,
+ * this produces a function pointer for casting from src_type_num
+ * to dst_type_num.  If a conversion is unsupported, returns NULL
+ * without setting a Python exception.
+ */
+NPY_NO_EXPORT PyArray_StridedTransferFn *
+PyArray_GetStridedNumericCastFn(npy_intp aligned, npy_intp src_stride,
+                             npy_intp dst_stride,
+                             int src_type_num, int dst_type_num);
+
+/*
  * If it's possible, gives back a transfer function which casts and/or
  * byte swaps data with the dtype 'src_dtype' into data with the dtype
  * 'dst_dtype'.  If the outtransferdata is populated with a non-NULL value,
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index d2d57d125..6a25e3a4c 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -3509,6 +3509,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
     /* This contains the all the inputs and outputs */
     PyArrayObject *op[NPY_MAXARGS];
     PyArray_Descr *dtype[NPY_MAXARGS];
+    PyArray_Descr *result_type;
 
     /* TODO: For 1.6, the default should probably be NPY_CORDER */
     NPY_ORDER order = NPY_KEEPORDER;
@@ -3535,6 +3536,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
         op[i] = NULL;
         dtype[i] = NULL;
     }
+    result_type = NULL;
 
     /* Get input arguments */
     for(i = 0; i < nin; ++i) {
@@ -3555,6 +3557,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
         op[i] = (PyArrayObject *)PyArray_FromAny(obj, NULL, 0, 0, 0, context);
 
         /* Start with a native byte-order data type */
+        /*
         if (PyArray_ISNBO(PyArray_DESCR(op[i])->byteorder)) {
             dtype[i] = PyArray_DESCR(op[i]);
             Py_INCREF(dtype[i]);
@@ -3563,6 +3566,7 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
             dtype[i] = PyArray_DescrNewByteorder(PyArray_DESCR(op[i]),
                                                             NPY_NATIVE);
         }
+        */
     }
 
     /* Get positional output arguments */
@@ -3670,13 +3674,42 @@ ufunc_generic_call_iter(PyUFuncObject *self, PyObject *args, PyObject *kwds)
             }
         }
     }
-    
-    return NULL;
+
+    /*
+     * Determine the result type.  A better approach would be for
+     * the ufunc to provide a function which gives back the result
+     * type and inner loop function.  The default would work as follows.
+     */
+    result_type = PyArray_ResultType(nin, op, 0, NULL);
+    if (result_type == NULL) {
+        goto fail;
+    }
+    /* Take into account the types of any output parameters */
+    for (i = nin; i < nargs; ++i) {
+        PyArray_Descr *tmp;
+        if (op[i] != NULL) {
+            tmp = PyArray_PromoteTypes(result_type, PyArray_DESCR(op[i]));
+            if (tmp == NULL) {
+                goto fail;
+            }
+            Py_DECREF(result_type);
+            result_type = tmp;
+        }
+    }
+
+    /* Find the function loop */
+
+    printf("result type: ");
+    PyObject_Print((PyObject *)result_type, stdout, 0);
+    printf("\n");
+
+    PyErr_SetString(PyExc_RuntimeError, "implementation is not finished!");
 fail:
     for (i = 0; i < niter; ++i) {
         Py_XDECREF(op[i]);
         Py_XDECREF(dtype[i]);
     }
+    Py_XDECREF(result_type);
     return NULL;
 }
author	Mark Wiebe <mwwiebe@gmail.com>	2011-01-19 12:31:46 -0800
committer	Mark Wiebe <mwwiebe@gmail.com>	2011-01-19 12:55:33 -0800
commit	a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e (patch)
tree	c7da6bd334c58dec111201999b6951b441b92a55
parent	fe08a916cf275ecd21c1b32b22aa3b8d2ca36b33 (diff)
download	numpy-a1eee39c7adfc9ec8cc807b8d580ae8ca82cee1e.tar.gz