diff options
author | Matti Picus <matti.picus@gmail.com> | 2018-05-10 04:50:23 +0300 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2018-05-09 19:50:23 -0600 |
commit | f5758d6fe15c2b506290bfc5379a10027617b331 (patch) | |
tree | 6b7d55f82640729cdd502aecac681a83795b0f9f /numpy/core | |
parent | 3ef81bf7d8864beb53c8873bcdb78b4a7e90600e (diff) | |
download | numpy-f5758d6fe15c2b506290bfc5379a10027617b331.tar.gz |
BUG: optimizing compilers can reorder call to npy_get_floatstatus (#11036)
* BUG: optimizing compilers can reorder call to npy_get_floatstatus
* alternative fix for npy_get_floatstatus, npy_clear_floatstatus
* unify test with pr #11043
* use barrier form of functions in place of PyUFunc_{get,clear}fperr
* update doc, prevent segfault
* MAINT: Do some rewrite on the 1.15.0 release notes.
[ci skip]
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/include/numpy/npy_math.h | 8 | ||||
-rw-r--r-- | numpy/core/src/npymath/ieee754.c.src | 75 | ||||
-rw-r--r-- | numpy/core/src/umath/extobj.c | 2 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 14 | ||||
-rw-r--r-- | numpy/core/src/umath/reduction.c | 2 | ||||
-rw-r--r-- | numpy/core/src/umath/scalarmath.c.src | 14 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 4 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 13 | ||||
-rw-r--r-- | numpy/core/tests/test_umath.py | 10 |
9 files changed, 101 insertions, 41 deletions
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index ba32bcdd3..55e0fbc79 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -524,8 +524,14 @@ npy_clongdouble npy_catanhl(npy_clongdouble z); #define NPY_FPE_UNDERFLOW 4 #define NPY_FPE_INVALID 8 -int npy_get_floatstatus(void); +int npy_clear_floatstatus_barrier(char*); +int npy_get_floatstatus_barrier(char*); +/* + * use caution with these - clang and gcc8.1 are known to reorder calls + * to this form of the function which can defeat the check + */ int npy_clear_floatstatus(void); +int npy_get_floatstatus(void); void npy_set_floatstatus_divbyzero(void); void npy_set_floatstatus_overflow(void); void npy_set_floatstatus_underflow(void); diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src index bca690b4d..5405c8fe3 100644 --- a/numpy/core/src/npymath/ieee754.c.src +++ b/numpy/core/src/npymath/ieee754.c.src @@ -6,6 +6,7 @@ */ #include "npy_math_common.h" #include "npy_math_private.h" +#include "numpy/utils.h" #ifndef HAVE_COPYSIGN double npy_copysign(double x, double y) @@ -557,6 +558,15 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) } #endif +int npy_clear_floatstatus() { + char x=0; + return npy_clear_floatstatus_barrier(&x); +} +int npy_get_floatstatus() { + char x=0; + return npy_get_floatstatus_barrier(&x); +} + /* * Functions to set the floating point status word. * keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h @@ -574,18 +584,24 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) defined(__NetBSD__) #include <ieeefp.h> -int npy_get_floatstatus(void) +int npy_get_floatstatus_barrier(char * param)) { int fpstatus = fpgetsticky(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) | ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0); } -int npy_clear_floatstatus(void) +int npy_clear_floatstatus_barrier(char * param) { - int fpstatus = npy_get_floatstatus(); + int fpstatus = npy_get_floatstatus_barrier(param); fpsetsticky(0); return fpstatus; @@ -617,10 +633,16 @@ void npy_set_floatstatus_invalid(void) (defined(__FreeBSD__) && (__FreeBSD_version >= 502114)) # include <fenv.h> -int npy_get_floatstatus(void) +int npy_get_floatstatus_barrier(char* param) { int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } return ((FE_DIVBYZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | ((FE_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | @@ -628,10 +650,10 @@ int npy_get_floatstatus(void) ((FE_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); } -int npy_clear_floatstatus(void) +int npy_clear_floatstatus_barrier(char * param) { /* testing float status is 50-100 times faster than clearing on x86 */ - int fpstatus = npy_get_floatstatus(); + int fpstatus = npy_get_floatstatus_barrier(param); if (fpstatus != 0) { feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); @@ -665,18 +687,24 @@ void npy_set_floatstatus_invalid(void) #include <float.h> #include <fpxcp.h> -int npy_get_floatstatus(void) +int npy_get_floatstatus_barrier(char *param) { int fpstatus = fp_read_flag(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); } -int npy_clear_floatstatus(void) +int npy_clear_floatstatus_barrier(char * param) { - int fpstatus = npy_get_floatstatus(); + int fpstatus = npy_get_floatstatus_barrier(param); fp_swap_flag(0); return fpstatus; @@ -710,8 +738,11 @@ void npy_set_floatstatus_invalid(void) #include <float.h> -int npy_get_floatstatus(void) +int npy_get_floatstatus_barrier(char *param) { + /* + * By using a volatile, the compiler cannot reorder this call + */ #if defined(_WIN64) int fpstatus = _statusfp(); #else @@ -720,15 +751,18 @@ int npy_get_floatstatus(void) _statusfp2(&fpstatus, &fpstatus2); fpstatus |= fpstatus2; #endif + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); } -int npy_clear_floatstatus(void) +int npy_clear_floatstatus_barrier(char *param) { - int fpstatus = npy_get_floatstatus(); + int fpstatus = npy_get_floatstatus_barrier(param); _clearfp(); return fpstatus; @@ -739,18 +773,24 @@ int npy_clear_floatstatus(void) #include <machine/fpu.h> -int npy_get_floatstatus(void) +int npy_get_floatstatus_barrier(char *param) { unsigned long fpstatus = ieee_get_fp_control(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) | ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0); } -int npy_clear_floatstatus(void) +int npy_clear_floatstatus_barrier(char *param) { - long fpstatus = npy_get_floatstatus(); + int fpstatus = npy_get_floatstatus_barrier(param); /* clear status bits as well as disable exception mode if on */ ieee_set_fp_control(0); @@ -759,13 +799,14 @@ int npy_clear_floatstatus(void) #else -int npy_get_floatstatus(void) +int npy_get_floatstatus_barrier(char NPY_UNUSED(*param)) { return 0; } -int npy_clear_floatstatus(void) +int npy_clear_floatstatus_barrier(char *param) { + int fpstatus = npy_get_floatstatus_barrier(param); return 0; } diff --git a/numpy/core/src/umath/extobj.c b/numpy/core/src/umath/extobj.c index e44036358..188054e22 100644 --- a/numpy/core/src/umath/extobj.c +++ b/numpy/core/src/umath/extobj.c @@ -284,7 +284,7 @@ _check_ufunc_fperr(int errmask, PyObject *extobj, const char *ufunc_name) { if (!errmask) { return 0; } - fperr = PyUFunc_getfperr(); + fperr = npy_get_floatstatus_barrier((char*)extobj); if (!fperr) { return 0; } diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 8b1c7e703..4faa068c8 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1819,7 +1819,7 @@ NPY_NO_EXPORT void *((npy_bool *)op1) = @func@(in1) != 0; } } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ @@ -1901,7 +1901,7 @@ NPY_NO_EXPORT void *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2; } } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ @@ -1991,7 +1991,7 @@ NPY_NO_EXPORT void *((@type@ *)op1) = tmp + 0; } } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } NPY_NO_EXPORT void @@ -2177,7 +2177,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED const npy_half in1 = *(npy_half *)ip1; *((npy_bool *)op1) = @func@(in1) != 0; } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat**/ @@ -2239,7 +2239,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED const npy_half in2 = *(npy_half *)ip2; *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2; } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat**/ @@ -2681,7 +2681,7 @@ NPY_NO_EXPORT void const @ftype@ in1i = ((@ftype@ *)ip1)[1]; *((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i); } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ @@ -2790,7 +2790,7 @@ NPY_NO_EXPORT void ((@ftype@ *)op1)[1] = in2i; } } - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 681d3fefa..5c3a84e21 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -537,7 +537,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, } /* Start with the floating-point exception flags cleared */ - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&iter); if (NpyIter_GetIterSize(iter) != 0) { NpyIter_IterNextFunc *iternext; diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index 6e1fb1ee8..3e29c4b4e 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -848,7 +848,7 @@ static PyObject * } #if @fperr@ - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&out); #endif /* @@ -863,7 +863,7 @@ static PyObject * #if @fperr@ /* Check status flag. If it is set, then look up what to do */ - retstatus = PyUFunc_getfperr(); + retstatus = npy_get_floatstatus_barrier((char*)&out); if (retstatus) { int bufsize, errmask; PyObject *errobj; @@ -993,7 +993,7 @@ static PyObject * return Py_NotImplemented; } - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&out); /* * here we do the actual calculation with arg1 and arg2 @@ -1008,7 +1008,7 @@ static PyObject * } /* Check status flag. If it is set, then look up what to do */ - retstatus = PyUFunc_getfperr(); + retstatus = npy_get_floatstatus_barrier((char*)&out); if (retstatus) { int bufsize, errmask; PyObject *errobj; @@ -1072,7 +1072,7 @@ static PyObject * return Py_NotImplemented; } - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&out); /* * here we do the actual calculation with arg1 and arg2 @@ -1136,7 +1136,7 @@ static PyObject * return Py_NotImplemented; } - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&out); /* * here we do the actual calculation with arg1 and arg2 @@ -1150,7 +1150,7 @@ static PyObject * } /* Check status flag. If it is set, then look up what to do */ - retstatus = PyUFunc_getfperr(); + retstatus = npy_get_floatstatus_barrier((char*)&out); if (retstatus) { int bufsize, errmask; PyObject *errobj; diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 2241414ac..2fae072ee 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -1031,7 +1031,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) i += 2 * stride; /* minps/minpd will set invalid flag if nan is encountered */ - npy_clear_floatstatus(); + npy_clear_floatstatus_barrier((char*)&c1); LOOP_BLOCKED(@type@, 32) { @vtype@ v1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); @vtype@ v2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]); @@ -1040,7 +1040,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) } c1 = @vpre@_@VOP@_@vsuf@(c1, c2); - if (npy_get_floatstatus() & NPY_FPE_INVALID) { + if (npy_get_floatstatus_barrier((char*)&c1) & NPY_FPE_INVALID) { *op = @nan@; } else { diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 072c0f66e..84a329475 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -100,7 +100,8 @@ PyUFunc_getfperr(void) * non-clearing get was only added in 1.9 so this function always cleared * keep it so just in case third party code relied on the clearing */ - return npy_clear_floatstatus(); + char param = 0; + return npy_clear_floatstatus_barrier(¶m); } #define HANDLEIT(NAME, str) {if (retstatus & NPY_FPE_##NAME) { \ @@ -133,7 +134,8 @@ NPY_NO_EXPORT int PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first) { /* clearing is done for backward compatibility */ - int retstatus = npy_clear_floatstatus(); + int retstatus; + retstatus = npy_clear_floatstatus_barrier((char*)&retstatus); return PyUFunc_handlefperr(errmask, errobj, retstatus, first); } @@ -144,7 +146,8 @@ PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first) NPY_NO_EXPORT void PyUFunc_clearfperr() { - npy_clear_floatstatus(); + char param = 0; + npy_clear_floatstatus_barrier(¶m); } /* @@ -2537,7 +2540,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, #endif /* Start with the floating-point exception flags cleared */ - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&iter); NPY_UF_DBG_PRINT("Executing inner loop\n"); @@ -2782,7 +2785,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc, } /* Start with the floating-point exception flags cleared */ - PyUFunc_clearfperr(); + npy_clear_floatstatus_barrier((char*)&ufunc); /* Do the ufunc loop */ if (need_fancy) { diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index ea0be1892..f12754a33 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1328,6 +1328,16 @@ class TestMinMax(object): assert_equal(d.max(), d[0]) assert_equal(d.min(), d[0]) + def test_reduce_warns(self): + # gh 10370 Some compilers reorder the call to npy_getfloatstatus and + # put it before the call to an intrisic function that causes invalid + # status to be set + for n in (2, 4, 8, 16, 32): + with suppress_warnings() as sup: + sup.record(RuntimeWarning) + for r in np.diagflat([np.nan] * n): + assert_equal(np.min(r), np.nan) + class TestAbsoluteNegative(object): def test_abs_neg_blocked(self): |