Move complex multiply and divide from the generic interface to the specific interface --- saves about 10% speed increase.

author: Travis Oliphant <oliphant@enthought.com> 2006-08-23 19:56:28 +0000
committer: Travis Oliphant <oliphant@enthought.com> 2006-08-23 19:56:28 +0000
commit: eb334fc4722b27cdad024784dc3246c80febfb35 (patch)
tree: 4cd6ad0e2dbd8fdc419b5f589630fd35f5d841aa
parent: 6bda06633124354dbd0e85caac0b70fed2ddd0ab (diff)
download: numpy-eb334fc4722b27cdad024784dc3246c80febfb35.tar.gz
4 files changed, 154 insertions, 30 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index d23245324..60feda3c6 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -101,6 +101,7 @@ allM = bints+flts+cmplxM
 nobool = all[1:]
 nobool_or_obj = all[1:-1]
 intflt = ints+flts
+intfltcmplx = nobool_or_obj
 nocmplx = bints+flts
 nocmplxO = nocmplx+O
 nocmplxM = nocmplx+M
@@ -122,22 +123,19 @@ defdict = {
 'multiply' :
     Ufunc(2, 1, One,
           'multiplies the arguments elementwise.',
-          TD(nocmplx),
-          TD(cmplx, f='prod'),
+          TD(noobj),
           TD(O, f='PyNumber_Multiply'),
           ),
 'divide' :
     Ufunc(2, 1, One,
           'divides the arguments elementwise.',
-          TD(intflt),
-          TD(cmplx, f='quot'),
+          TD(intfltcmplx),
           TD(O, f='PyNumber_Divide'),
           ),
 'floor_divide' :
     Ufunc(2, 1, One,
           'floor divides the arguments elementwise.',
-          TD(intflt),
-          TD(cmplx, f='floor_quot'),
+          TD(intfltcmplx),
           TD(O, f='PyNumber_FloorDivide'),
           ),
 'true_divide' :
@@ -145,8 +143,7 @@ defdict = {
           'true divides the arguments elementwise.',
           TD('bBhH', out='f'),
           TD('iIlLqQ', out='d'),
-          TD(flts),
-          TD(cmplx, f='quot'),
+          TD(flts+cmplx),
           TD(O, f='PyNumber_TrueDivide'),
           ),
 'conjugate' :
diff --git a/numpy/core/include/numpy/arrayobject.h b/numpy/core/include/numpy/arrayobject.h
index e258eb01d..f64d2a6c3 100644
--- a/numpy/core/include/numpy/arrayobject.h
+++ b/numpy/core/include/numpy/arrayobject.h
@@ -16,8 +16,6 @@
 #include "noprefix.h"
 #endif
 
-#ifndef NPY_NO_SIGNAL
 #include "npy_interrupt.h"
-#endif
 
 #endif
diff --git a/numpy/core/include/numpy/npy_interrupt.h b/numpy/core/include/numpy/npy_interrupt.h
index 1e4257d56..0fd94cf34 100644
--- a/numpy/core/include/numpy/npy_interrupt.h
+++ b/numpy/core/include/numpy/npy_interrupt.h
@@ -1,14 +1,88 @@
 
 /* Signal handling: 
 
-In your C-extension:  
+This header file defines macros that allow your code to handle
+interrupts received during processing.  Interrupts that 
+could reasonably be handled:
 
-Around a block of code you want to be interruptable 
+SIGINT, SIGABRT, SIGALRM, SIGSEGV
 
-NPY_SIG_ON
+****Warning***************
+
+Do not allow code that creates temporary memory or increases reference
+counts of Python objects to be interrupted unless you handle decrementing
+the reference counts and freeing any allocated memory in the clean-up code.
+
+**************************
+
+The mechanism for handling interrupts is conceptually simple:
+
+  - replace the signal handler with our own home-grown version
+     and store the old one.  
+  - run the code to be interrupted -- if an interrupt occurs
+     the handler should basically just cause a return to the
+     calling function for clean-up work. 
+  - restore the old signal handler 
+
+Of course, every code that allows interrupts must account for
+returning via the interrupt and handle clean-up correctly.  But,
+even still, the simple paradigm is complicated by at least three
+factors.
+
+ 1) platform portability (i.e. Microsoft says not to use longjmp
+     to return from signal handling.  They have a __try  and __except 
+     extension to C instead but what about mingw?).
+ 2) how to handle threads
+     a) apparently whether signals are delivered to every thread of
+        the process or the "invoking" thread is platform dependent. 
+     b) if we use global variables to save state, then how is this
+        to be done in a thread-safe way.
+ 3) A general-purpose facility must allow for the possibility of
+    re-entrance (i.e. during execution of the code that is allowed
+    to interrupt, we might call back into this very section of code
+    serially). 
+
+Ideas:
+
+ 1) Start by implementing an approach that works on platforms that
+    can use setjmp and longjmp functionality and does nothing 
+    on other platforms.  Initially only catch SIGINT.
+
+ 2) Handle threads by storing global information in a linked-list
+    with a process-id key.  Then use a call-back function that longjmps
+    only to the correct buffer.
+
+ 3) Store a local copy of the global information and restore it on clean-up
+    so that re-entrance works. 
+
+
+Interface:
+
+In your C-extension.  around a block of code you want to be interruptable 
+
+NPY_SIG_TRY {
+[code]
+}
+NPY_SIG_EXCEPT(sigval) {  
+[signal return]
+}
+NPY_SIG_ELSE 
+[normal return]
+
+sigval is a local variable that will receive what
+signal was received.  You can use it to perform different
+actions based on the signal received. 
+
+Default actions (setting of specific Python errors)
+can be obtained with
+
+NPY_SIG_TRY {
 [code]
-NPY_SIG_OFF
+NPY_SIG_EXCEPT_GOTO(label)
+[normal return]
 
+label:
+  [error return]
 */
 
 /* Add signal handling macros */
@@ -16,9 +90,16 @@ NPY_SIG_OFF
 #ifndef NPY_INTERRUPT_H
 #define NPY_INTERRUPT_H
 
+#ifdef NPY_NO_SIGNAL
+
+#define NPY_SIG_ON
+#define NPY_SIG_OFF
+
+#else
+
 #define NPY_SIG_ON
 #define NPY_SIG_OFF
-#define NPY_SIG_CHECK
 
+#endif /* NPY_NO_SIGNAL */
 
 #endif /* NPY_INTERRUPT_H */
diff --git a/numpy/core/src/umathmodule.c.src b/numpy/core/src/umathmodule.c.src
index 79488b5db..ac88859b3 100644
--- a/numpy/core/src/umathmodule.c.src
+++ b/numpy/core/src/umathmodule.c.src
@@ -593,16 +593,6 @@ nc_quot@c@(c@typ@ *a, c@typ@ *b, c@typ@ *r)
 }
 
 static void
-nc_floor_quot@c@(c@typ@ *a, c@typ@ *b, c@typ@ *r)
-{
-	register @typ@ ar=a->real, br=b->real, ai=a->imag, bi=b->imag;
-	register @typ@ d = br*br + bi*bi;
-	r->real = floor@c@((ar*br + ai*bi)/d);
-	r->imag = 0;
-	return;
-}
-
-static void
 nc_sqrt@c@(c@typ@ *x, c@typ@ *r)
 {
 	@typ@ s,d;
@@ -979,8 +969,67 @@ static void
 }
 /**end repeat**/
 
+
 /**begin repeat
+#TYP= CFLOAT, CDOUBLE, CLONGDOUBLE#
+#typ= float, double, longdouble#
+#c=f,,l#
+*/
+static void
+@TYP@_multiply(char **args, intp *dimensions, intp *steps, void *func)
+{
+	register intp i;
+	intp is1=steps[0], is2=steps[1], os=steps[2], n=dimensions[0];
+	char *i1=args[0], *i2=args[1], *op=args[2];
+	for (i=0; i<n; i++, i1+=is1, i2+=is2, op+=os) {
+                register @typ@ ar=((c@typ@ *)i1)->real, \
+                        ai=((c@typ@ *)i1)->imag,        \
+                        br=((c@typ@ *)i2)->real,        \
+                        bi=((c@typ@ *)i2)->imag;
+                ((c@typ@ *)op)->real = ar*br - ai*bi;
+                ((c@typ@ *)op)->imag = ar*bi + ai*br;
+	}
+}
 
+static void
+@TYP@_divide(char **args, intp *dimensions, intp *steps, void *func)
+{
+	register intp i;
+	intp is1=steps[0], is2=steps[1], os=steps[2], n=dimensions[0];
+	char *i1=args[0], *i2=args[1], *op=args[2];
+	for (i=0; i<n; i++, i1+=is1, i2+=is2, op+=os) {
+                register @typ@ ar=((c@typ@ *)i1)->real, \
+                        ai=((c@typ@ *)i1)->imag,        \
+                        br=((c@typ@ *)i2)->real,        \
+                        bi=((c@typ@ *)i2)->imag;
+                register @typ@ d = br*br + bi*bi;
+                ((c@typ@ *)op)->real = (ar*br + ai*bi)/d;
+                ((c@typ@ *)op)->imag = (ai*br - ar*bi)/d;
+        }
+}
+
+static void
+@TYP@_floor_divide(char **args, intp *dimensions, intp *steps, void *func)
+{
+	register intp i;
+        intp is1=steps[0],is2=steps[1],os=steps[2],n=dimensions[0];
+	char *i1=args[0], *i2=args[1], *op=args[2];
+	for(i=0; i<n; i++, i1+=is1, i2+=is2, op+=os) {
+                register @typ@ ar=((c@typ@ *)i1)->real,    \
+                        ai=((c@typ@ *)i1)->imag,           \
+                        br=((c@typ@ *)i2)->real,           \
+                        bi=((c@typ@ *)i2)->imag;
+                register @typ@ d = br*br + bi*bi;
+                ((c@typ@ *)op)->real = floor@c@((ar*br + ai*bi)/d);
+                ((c@typ@ *)op)->imag = 0;
+	}
+}
+
+#define @TYP@_true_divide @TYP@_divide
+/**end repeat**/
+
+
+/**begin repeat
 #TYP=BYTE,UBYTE,SHORT,USHORT,INT,UINT,LONG,ULONG,LONGLONG,ULONGLONG#
 #typ=char, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong#
 #otyp=float*4, double*6#
@@ -1020,13 +1069,11 @@ static void
 /**end repeat**/
 
 /**begin repeat
-
-#TYP=(FLOAT,DOUBLE,LONGDOUBLE)*2#
-#typ=(float,double,longdouble)*2#
-#kind=divide*3, true_divide*3#
+#TYP=FLOAT,DOUBLE,LONGDOUBLE#
+#typ=float,double,longdouble#
 */
 static void
-@TYP@_@kind@(char **args, intp *dimensions, intp *steps, void *func)
+@TYP@_divide(char **args, intp *dimensions, intp *steps, void *func)
 {
 	register intp i, is1=steps[0],is2=steps[1],os=steps[2],n=dimensions[0];
 	char *i1=args[0], *i2=args[1], *op=args[2];
@@ -1034,6 +1081,7 @@ static void
 		*((@typ@ *)op)=*((@typ@ *)i1) / *((@typ@ *)i2);
 	}
 }
+#define @TYP@_true_divide @TYP@_divide
 /**end repeat**/
 
 /**begin repeat
author	Travis Oliphant <oliphant@enthought.com>	2006-08-23 19:56:28 +0000
committer	Travis Oliphant <oliphant@enthought.com>	2006-08-23 19:56:28 +0000
commit	eb334fc4722b27cdad024784dc3246c80febfb35 (patch)
tree	4cd6ad0e2dbd8fdc419b5f589630fd35f5d841aa
parent	6bda06633124354dbd0e85caac0b70fed2ddd0ab (diff)
download	numpy-eb334fc4722b27cdad024784dc3246c80febfb35.tar.gz