BUG: optimizing compilers can reorder call to npy_get_floatstatus (#11036)

* BUG: optimizing compilers can reorder call to npy_get_floatstatus * alternative fix for npy_get_floatstatus, npy_clear_floatstatus * unify test with pr #11043 * use barrier form of functions in place of PyUFunc_{get,clear}fperr * update doc, prevent segfault * MAINT: Do some rewrite on the 1.15.0 release notes. [ci skip]
author: Matti Picus <matti.picus@gmail.com> 2018-05-10 04:50:23 +0300
committer: Charles Harris <charlesr.harris@gmail.com> 2018-05-09 19:50:23 -0600
commit: f5758d6fe15c2b506290bfc5379a10027617b331 (patch)
tree: 6b7d55f82640729cdd502aecac681a83795b0f9f /numpy/core
parent: 3ef81bf7d8864beb53c8873bcdb78b4a7e90600e (diff)
download: numpy-f5758d6fe15c2b506290bfc5379a10027617b331.tar.gz
9 files changed, 101 insertions, 41 deletions
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index ba32bcdd3..55e0fbc79 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -524,8 +524,14 @@ npy_clongdouble npy_catanhl(npy_clongdouble z);
 #define NPY_FPE_UNDERFLOW     4
 #define NPY_FPE_INVALID       8
 
-int npy_get_floatstatus(void);
+int npy_clear_floatstatus_barrier(char*);
+int npy_get_floatstatus_barrier(char*);
+/*
+ * use caution with these - clang and gcc8.1 are known to reorder calls
+ * to this form of the function which can defeat the check
+ */
 int npy_clear_floatstatus(void);
+int npy_get_floatstatus(void);
 void npy_set_floatstatus_divbyzero(void);
 void npy_set_floatstatus_overflow(void);
 void npy_set_floatstatus_underflow(void);
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index bca690b4d..5405c8fe3 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -6,6 +6,7 @@
  */
 #include "npy_math_common.h"
 #include "npy_math_private.h"
+#include "numpy/utils.h"
 
 #ifndef HAVE_COPYSIGN
 double npy_copysign(double x, double y)
@@ -557,6 +558,15 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
 }
 #endif
 
+int npy_clear_floatstatus() {
+    char x=0;
+    return npy_clear_floatstatus_barrier(&x);
+}
+int npy_get_floatstatus() {
+    char x=0;
+    return npy_get_floatstatus_barrier(&x);
+}
+
 /*
  * Functions to set the floating point status word.
  * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
@@ -574,18 +584,24 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
     defined(__NetBSD__)
 #include <ieeefp.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char * param))
 {
     int fpstatus = fpgetsticky();
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return ((FP_X_DZ  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
 {
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     fpsetsticky(0);
 
     return fpstatus;
@@ -617,10 +633,16 @@ void npy_set_floatstatus_invalid(void)
       (defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
 #  include <fenv.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char* param)
 {
     int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
                                 FE_UNDERFLOW | FE_INVALID);
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
 
     return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
@@ -628,10 +650,10 @@ int npy_get_floatstatus(void)
            ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
 {
     /* testing float status is 50-100 times faster than clearing on x86 */
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     if (fpstatus != 0) {
         feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
                       FE_UNDERFLOW | FE_INVALID);
@@ -665,18 +687,24 @@ void npy_set_floatstatus_invalid(void)
 #include <float.h>
 #include <fpxcp.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
 {
     int fpstatus = fp_read_flag();
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
 {
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     fp_swap_flag(0);
 
     return fpstatus;
@@ -710,8 +738,11 @@ void npy_set_floatstatus_invalid(void)
 #include <float.h>
 
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
 {
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
 #if defined(_WIN64)
     int fpstatus = _statusfp();
 #else
@@ -720,15 +751,18 @@ int npy_get_floatstatus(void)
     _statusfp2(&fpstatus, &fpstatus2);
     fpstatus |= fpstatus2;
 #endif
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
            ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
            ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
            ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
 {
-    int fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     _clearfp();
 
     return fpstatus;
@@ -739,18 +773,24 @@ int npy_clear_floatstatus(void)
 
 #include <machine/fpu.h>
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
 {
     unsigned long fpstatus = ieee_get_fp_control();
+    /*
+     * By using a volatile, the compiler cannot reorder this call
+     */
+    if (param != NULL) {
+        volatile char NPY_UNUSED(c) = *(char*)param;
+    }
     return  ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
             ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
             ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
             ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0);
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
 {
-    long fpstatus = npy_get_floatstatus();
+    int fpstatus = npy_get_floatstatus_barrier(param);
     /* clear status bits as well as disable exception mode if on */
     ieee_set_fp_control(0);
 
@@ -759,13 +799,14 @@ int npy_clear_floatstatus(void)
 
 #else
 
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char NPY_UNUSED(*param))
 {
     return 0;
 }
 
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
 {
+    int fpstatus = npy_get_floatstatus_barrier(param);
     return 0;
 }
 
diff --git a/numpy/core/src/umath/extobj.c b/numpy/core/src/umath/extobj.c
index e44036358..188054e22 100644
--- a/numpy/core/src/umath/extobj.c
+++ b/numpy/core/src/umath/extobj.c
@@ -284,7 +284,7 @@ _check_ufunc_fperr(int errmask, PyObject *extobj, const char *ufunc_name) {
     if (!errmask) {
         return 0;
     }
-    fperr = PyUFunc_getfperr();
+    fperr = npy_get_floatstatus_barrier((char*)extobj);
     if (!fperr) {
         return 0;
     }
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 8b1c7e703..4faa068c8 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1819,7 +1819,7 @@ NPY_NO_EXPORT void
             *((npy_bool *)op1) = @func@(in1) != 0;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -1901,7 +1901,7 @@ NPY_NO_EXPORT void
             *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -1991,7 +1991,7 @@ NPY_NO_EXPORT void
             *((@type@ *)op1) = tmp + 0;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 
 NPY_NO_EXPORT void
@@ -2177,7 +2177,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
         const npy_half in1 = *(npy_half *)ip1;
         *((npy_bool *)op1) = @func@(in1) != 0;
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat**/
 
@@ -2239,7 +2239,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
         const npy_half in2 = *(npy_half *)ip2;
         *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat**/
 
@@ -2681,7 +2681,7 @@ NPY_NO_EXPORT void
         const @ftype@ in1i = ((@ftype@ *)ip1)[1];
         *((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i);
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
@@ -2790,7 +2790,7 @@ NPY_NO_EXPORT void
             ((@ftype@ *)op1)[1] = in2i;
         }
     }
-    npy_clear_floatstatus();
+    npy_clear_floatstatus_barrier((char*)dimensions);
 }
 /**end repeat1**/
 
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 681d3fefa..5c3a84e21 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -537,7 +537,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
     }
 
     /* Start with the floating-point exception flags cleared */
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&iter);
 
     if (NpyIter_GetIterSize(iter) != 0) {
         NpyIter_IterNextFunc *iternext;
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 6e1fb1ee8..3e29c4b4e 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -848,7 +848,7 @@ static PyObject *
     }
 
 #if @fperr@
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&out);
 #endif
 
     /*
@@ -863,7 +863,7 @@ static PyObject *
 
 #if @fperr@
     /* Check status flag.  If it is set, then look up what to do */
-    retstatus = PyUFunc_getfperr();
+    retstatus = npy_get_floatstatus_barrier((char*)&out);
     if (retstatus) {
         int bufsize, errmask;
         PyObject *errobj;
@@ -993,7 +993,7 @@ static PyObject *
         return Py_NotImplemented;
     }
 
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&out);
 
     /*
      * here we do the actual calculation with arg1 and arg2
@@ -1008,7 +1008,7 @@ static PyObject *
     }
 
     /* Check status flag.  If it is set, then look up what to do */
-    retstatus = PyUFunc_getfperr();
+    retstatus = npy_get_floatstatus_barrier((char*)&out);
     if (retstatus) {
         int bufsize, errmask;
         PyObject *errobj;
@@ -1072,7 +1072,7 @@ static PyObject *
         return Py_NotImplemented;
     }
 
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&out);
 
     /*
      * here we do the actual calculation with arg1 and arg2
@@ -1136,7 +1136,7 @@ static PyObject *
         return Py_NotImplemented;
     }
 
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&out);
 
     /*
      * here we do the actual calculation with arg1 and arg2
@@ -1150,7 +1150,7 @@ static PyObject *
     }
 
     /* Check status flag.  If it is set, then look up what to do */
-    retstatus = PyUFunc_getfperr();
+    retstatus = npy_get_floatstatus_barrier((char*)&out);
     if (retstatus) {
         int bufsize, errmask;
         PyObject *errobj;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 2241414ac..2fae072ee 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -1031,7 +1031,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
         i += 2 * stride;
 
         /* minps/minpd will set invalid flag if nan is encountered */
-        npy_clear_floatstatus();
+        npy_clear_floatstatus_barrier((char*)&c1);
         LOOP_BLOCKED(@type@, 32) {
             @vtype@ v1 = @vpre@_load_@vsuf@((@type@*)&ip[i]);
             @vtype@ v2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]);
@@ -1040,7 +1040,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
         }
         c1 = @vpre@_@VOP@_@vsuf@(c1, c2);
 
-        if (npy_get_floatstatus() & NPY_FPE_INVALID) {
+        if (npy_get_floatstatus_barrier((char*)&c1) & NPY_FPE_INVALID) {
             *op = @nan@;
         }
         else {
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 072c0f66e..84a329475 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -100,7 +100,8 @@ PyUFunc_getfperr(void)
      * non-clearing get was only added in 1.9 so this function always cleared
      * keep it so just in case third party code relied on the clearing
      */
-    return npy_clear_floatstatus();
+    char param = 0;
+    return npy_clear_floatstatus_barrier(&param);
 }
 
 #define HANDLEIT(NAME, str) {if (retstatus & NPY_FPE_##NAME) {          \
@@ -133,7 +134,8 @@ NPY_NO_EXPORT int
 PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
 {
     /* clearing is done for backward compatibility */
-    int retstatus = npy_clear_floatstatus();
+    int retstatus;
+    retstatus = npy_clear_floatstatus_barrier((char*)&retstatus);
 
     return PyUFunc_handlefperr(errmask, errobj, retstatus, first);
 }
@@ -144,7 +146,8 @@ PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
 NPY_NO_EXPORT void
 PyUFunc_clearfperr()
 {
-    npy_clear_floatstatus();
+    char param = 0;
+    npy_clear_floatstatus_barrier(&param);
 }
 
 /*
@@ -2537,7 +2540,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 #endif
 
     /* Start with the floating-point exception flags cleared */
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&iter);
 
     NPY_UF_DBG_PRINT("Executing inner loop\n");
 
@@ -2782,7 +2785,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
     }
 
     /* Start with the floating-point exception flags cleared */
-    PyUFunc_clearfperr();
+    npy_clear_floatstatus_barrier((char*)&ufunc);
 
     /* Do the ufunc loop */
     if (need_fancy) {
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index ea0be1892..f12754a33 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1328,6 +1328,16 @@ class TestMinMax(object):
         assert_equal(d.max(), d[0])
         assert_equal(d.min(), d[0])
 
+    def test_reduce_warns(self):
+        # gh 10370 Some compilers reorder the call to npy_getfloatstatus and
+        # put it before the call to an intrisic function that causes invalid
+        # status to be set
+        for n in (2, 4, 8, 16, 32):
+            with suppress_warnings() as sup:
+                sup.record(RuntimeWarning)
+                for r in np.diagflat([np.nan] * n):
+                    assert_equal(np.min(r), np.nan)
+
 
 class TestAbsoluteNegative(object):
     def test_abs_neg_blocked(self):
author	Matti Picus <matti.picus@gmail.com>	2018-05-10 04:50:23 +0300
committer	Charles Harris <charlesr.harris@gmail.com>	2018-05-09 19:50:23 -0600
commit	f5758d6fe15c2b506290bfc5379a10027617b331 (patch)
tree	6b7d55f82640729cdd502aecac681a83795b0f9f /numpy/core
parent	3ef81bf7d8864beb53c8873bcdb78b4a7e90600e (diff)
download	numpy-f5758d6fe15c2b506290bfc5379a10027617b331.tar.gz