summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2018-05-10 04:50:23 +0300
committerCharles Harris <charlesr.harris@gmail.com>2018-05-09 19:50:23 -0600
commitf5758d6fe15c2b506290bfc5379a10027617b331 (patch)
tree6b7d55f82640729cdd502aecac681a83795b0f9f /numpy/core
parent3ef81bf7d8864beb53c8873bcdb78b4a7e90600e (diff)
downloadnumpy-f5758d6fe15c2b506290bfc5379a10027617b331.tar.gz
BUG: optimizing compilers can reorder call to npy_get_floatstatus (#11036)
* BUG: optimizing compilers can reorder call to npy_get_floatstatus * alternative fix for npy_get_floatstatus, npy_clear_floatstatus * unify test with pr #11043 * use barrier form of functions in place of PyUFunc_{get,clear}fperr * update doc, prevent segfault * MAINT: Do some rewrite on the 1.15.0 release notes. [ci skip]
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/include/numpy/npy_math.h8
-rw-r--r--numpy/core/src/npymath/ieee754.c.src75
-rw-r--r--numpy/core/src/umath/extobj.c2
-rw-r--r--numpy/core/src/umath/loops.c.src14
-rw-r--r--numpy/core/src/umath/reduction.c2
-rw-r--r--numpy/core/src/umath/scalarmath.c.src14
-rw-r--r--numpy/core/src/umath/simd.inc.src4
-rw-r--r--numpy/core/src/umath/ufunc_object.c13
-rw-r--r--numpy/core/tests/test_umath.py10
9 files changed, 101 insertions, 41 deletions
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index ba32bcdd3..55e0fbc79 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -524,8 +524,14 @@ npy_clongdouble npy_catanhl(npy_clongdouble z);
#define NPY_FPE_UNDERFLOW 4
#define NPY_FPE_INVALID 8
-int npy_get_floatstatus(void);
+int npy_clear_floatstatus_barrier(char*);
+int npy_get_floatstatus_barrier(char*);
+/*
+ * use caution with these - clang and gcc8.1 are known to reorder calls
+ * to this form of the function which can defeat the check
+ */
int npy_clear_floatstatus(void);
+int npy_get_floatstatus(void);
void npy_set_floatstatus_divbyzero(void);
void npy_set_floatstatus_overflow(void);
void npy_set_floatstatus_underflow(void);
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index bca690b4d..5405c8fe3 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -6,6 +6,7 @@
*/
#include "npy_math_common.h"
#include "npy_math_private.h"
+#include "numpy/utils.h"
#ifndef HAVE_COPYSIGN
double npy_copysign(double x, double y)
@@ -557,6 +558,15 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
}
#endif
+int npy_clear_floatstatus() {
+ char x=0;
+ return npy_clear_floatstatus_barrier(&x);
+}
+int npy_get_floatstatus() {
+ char x=0;
+ return npy_get_floatstatus_barrier(&x);
+}
+
/*
* Functions to set the floating point status word.
* keep in sync with NO_FLOATING_POINT_SUPPORT in ufuncobject.h
@@ -574,18 +584,24 @@ npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
defined(__NetBSD__)
#include <ieeefp.h>
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char * param))
{
int fpstatus = fpgetsticky();
+ /*
+ * By using a volatile, the compiler cannot reorder this call
+ */
+ if (param != NULL) {
+ volatile char NPY_UNUSED(c) = *(char*)param;
+ }
return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0);
}
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
{
- int fpstatus = npy_get_floatstatus();
+ int fpstatus = npy_get_floatstatus_barrier(param);
fpsetsticky(0);
return fpstatus;
@@ -617,10 +633,16 @@ void npy_set_floatstatus_invalid(void)
(defined(__FreeBSD__) && (__FreeBSD_version >= 502114))
# include <fenv.h>
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char* param)
{
int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
FE_UNDERFLOW | FE_INVALID);
+ /*
+ * By using a volatile, the compiler cannot reorder this call
+ */
+ if (param != NULL) {
+ volatile char NPY_UNUSED(c) = *(char*)param;
+ }
return ((FE_DIVBYZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
((FE_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
@@ -628,10 +650,10 @@ int npy_get_floatstatus(void)
((FE_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
}
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
{
/* testing float status is 50-100 times faster than clearing on x86 */
- int fpstatus = npy_get_floatstatus();
+ int fpstatus = npy_get_floatstatus_barrier(param);
if (fpstatus != 0) {
feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
FE_UNDERFLOW | FE_INVALID);
@@ -665,18 +687,24 @@ void npy_set_floatstatus_invalid(void)
#include <float.h>
#include <fpxcp.h>
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
{
int fpstatus = fp_read_flag();
+ /*
+ * By using a volatile, the compiler cannot reorder this call
+ */
+ if (param != NULL) {
+ volatile char NPY_UNUSED(c) = *(char*)param;
+ }
return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
}
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char * param)
{
- int fpstatus = npy_get_floatstatus();
+ int fpstatus = npy_get_floatstatus_barrier(param);
fp_swap_flag(0);
return fpstatus;
@@ -710,8 +738,11 @@ void npy_set_floatstatus_invalid(void)
#include <float.h>
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
{
+ /*
+ * By using a volatile, the compiler cannot reorder this call
+ */
#if defined(_WIN64)
int fpstatus = _statusfp();
#else
@@ -720,15 +751,18 @@ int npy_get_floatstatus(void)
_statusfp2(&fpstatus, &fpstatus2);
fpstatus |= fpstatus2;
#endif
+ if (param != NULL) {
+ volatile char NPY_UNUSED(c) = *(char*)param;
+ }
return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
}
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
{
- int fpstatus = npy_get_floatstatus();
+ int fpstatus = npy_get_floatstatus_barrier(param);
_clearfp();
return fpstatus;
@@ -739,18 +773,24 @@ int npy_clear_floatstatus(void)
#include <machine/fpu.h>
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char *param)
{
unsigned long fpstatus = ieee_get_fp_control();
+ /*
+ * By using a volatile, the compiler cannot reorder this call
+ */
+ if (param != NULL) {
+ volatile char NPY_UNUSED(c) = *(char*)param;
+ }
return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0);
}
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
{
- long fpstatus = npy_get_floatstatus();
+ int fpstatus = npy_get_floatstatus_barrier(param);
/* clear status bits as well as disable exception mode if on */
ieee_set_fp_control(0);
@@ -759,13 +799,14 @@ int npy_clear_floatstatus(void)
#else
-int npy_get_floatstatus(void)
+int npy_get_floatstatus_barrier(char NPY_UNUSED(*param))
{
return 0;
}
-int npy_clear_floatstatus(void)
+int npy_clear_floatstatus_barrier(char *param)
{
+ int fpstatus = npy_get_floatstatus_barrier(param);
return 0;
}
diff --git a/numpy/core/src/umath/extobj.c b/numpy/core/src/umath/extobj.c
index e44036358..188054e22 100644
--- a/numpy/core/src/umath/extobj.c
+++ b/numpy/core/src/umath/extobj.c
@@ -284,7 +284,7 @@ _check_ufunc_fperr(int errmask, PyObject *extobj, const char *ufunc_name) {
if (!errmask) {
return 0;
}
- fperr = PyUFunc_getfperr();
+ fperr = npy_get_floatstatus_barrier((char*)extobj);
if (!fperr) {
return 0;
}
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 8b1c7e703..4faa068c8 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1819,7 +1819,7 @@ NPY_NO_EXPORT void
*((npy_bool *)op1) = @func@(in1) != 0;
}
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
@@ -1901,7 +1901,7 @@ NPY_NO_EXPORT void
*((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
}
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
@@ -1991,7 +1991,7 @@ NPY_NO_EXPORT void
*((@type@ *)op1) = tmp + 0;
}
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
NPY_NO_EXPORT void
@@ -2177,7 +2177,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
const npy_half in1 = *(npy_half *)ip1;
*((npy_bool *)op1) = @func@(in1) != 0;
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat**/
@@ -2239,7 +2239,7 @@ HALF_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
const npy_half in2 = *(npy_half *)ip2;
*((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat**/
@@ -2681,7 +2681,7 @@ NPY_NO_EXPORT void
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
*((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i);
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
@@ -2790,7 +2790,7 @@ NPY_NO_EXPORT void
((@ftype@ *)op1)[1] = in2i;
}
}
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 681d3fefa..5c3a84e21 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -537,7 +537,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
}
/* Start with the floating-point exception flags cleared */
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&iter);
if (NpyIter_GetIterSize(iter) != 0) {
NpyIter_IterNextFunc *iternext;
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 6e1fb1ee8..3e29c4b4e 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -848,7 +848,7 @@ static PyObject *
}
#if @fperr@
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&out);
#endif
/*
@@ -863,7 +863,7 @@ static PyObject *
#if @fperr@
/* Check status flag. If it is set, then look up what to do */
- retstatus = PyUFunc_getfperr();
+ retstatus = npy_get_floatstatus_barrier((char*)&out);
if (retstatus) {
int bufsize, errmask;
PyObject *errobj;
@@ -993,7 +993,7 @@ static PyObject *
return Py_NotImplemented;
}
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&out);
/*
* here we do the actual calculation with arg1 and arg2
@@ -1008,7 +1008,7 @@ static PyObject *
}
/* Check status flag. If it is set, then look up what to do */
- retstatus = PyUFunc_getfperr();
+ retstatus = npy_get_floatstatus_barrier((char*)&out);
if (retstatus) {
int bufsize, errmask;
PyObject *errobj;
@@ -1072,7 +1072,7 @@ static PyObject *
return Py_NotImplemented;
}
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&out);
/*
* here we do the actual calculation with arg1 and arg2
@@ -1136,7 +1136,7 @@ static PyObject *
return Py_NotImplemented;
}
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&out);
/*
* here we do the actual calculation with arg1 and arg2
@@ -1150,7 +1150,7 @@ static PyObject *
}
/* Check status flag. If it is set, then look up what to do */
- retstatus = PyUFunc_getfperr();
+ retstatus = npy_get_floatstatus_barrier((char*)&out);
if (retstatus) {
int bufsize, errmask;
PyObject *errobj;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 2241414ac..2fae072ee 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -1031,7 +1031,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
i += 2 * stride;
/* minps/minpd will set invalid flag if nan is encountered */
- npy_clear_floatstatus();
+ npy_clear_floatstatus_barrier((char*)&c1);
LOOP_BLOCKED(@type@, 32) {
@vtype@ v1 = @vpre@_load_@vsuf@((@type@*)&ip[i]);
@vtype@ v2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]);
@@ -1040,7 +1040,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
}
c1 = @vpre@_@VOP@_@vsuf@(c1, c2);
- if (npy_get_floatstatus() & NPY_FPE_INVALID) {
+ if (npy_get_floatstatus_barrier((char*)&c1) & NPY_FPE_INVALID) {
*op = @nan@;
}
else {
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 072c0f66e..84a329475 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -100,7 +100,8 @@ PyUFunc_getfperr(void)
* non-clearing get was only added in 1.9 so this function always cleared
* keep it so just in case third party code relied on the clearing
*/
- return npy_clear_floatstatus();
+ char param = 0;
+ return npy_clear_floatstatus_barrier(&param);
}
#define HANDLEIT(NAME, str) {if (retstatus & NPY_FPE_##NAME) { \
@@ -133,7 +134,8 @@ NPY_NO_EXPORT int
PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
{
/* clearing is done for backward compatibility */
- int retstatus = npy_clear_floatstatus();
+ int retstatus;
+ retstatus = npy_clear_floatstatus_barrier((char*)&retstatus);
return PyUFunc_handlefperr(errmask, errobj, retstatus, first);
}
@@ -144,7 +146,8 @@ PyUFunc_checkfperr(int errmask, PyObject *errobj, int *first)
NPY_NO_EXPORT void
PyUFunc_clearfperr()
{
- npy_clear_floatstatus();
+ char param = 0;
+ npy_clear_floatstatus_barrier(&param);
}
/*
@@ -2537,7 +2540,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
#endif
/* Start with the floating-point exception flags cleared */
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&iter);
NPY_UF_DBG_PRINT("Executing inner loop\n");
@@ -2782,7 +2785,7 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc,
}
/* Start with the floating-point exception flags cleared */
- PyUFunc_clearfperr();
+ npy_clear_floatstatus_barrier((char*)&ufunc);
/* Do the ufunc loop */
if (need_fancy) {
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index ea0be1892..f12754a33 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1328,6 +1328,16 @@ class TestMinMax(object):
assert_equal(d.max(), d[0])
assert_equal(d.min(), d[0])
+ def test_reduce_warns(self):
+ # gh 10370 Some compilers reorder the call to npy_getfloatstatus and
+ # put it before the call to an intrisic function that causes invalid
+ # status to be set
+ for n in (2, 4, 8, 16, 32):
+ with suppress_warnings() as sup:
+ sup.record(RuntimeWarning)
+ for r in np.diagflat([np.nan] * n):
+ assert_equal(np.min(r), np.nan)
+
class TestAbsoluteNegative(object):
def test_abs_neg_blocked(self):