BUG: Fix rounding of denormals in double and float to half casts … (#12722)

* BUG: Fix rounding of denormals in double and float to half casts Previously the significand was shifted right to align denormals of different magnitude. This loses some bits that can make a difference for rounding. This is fixed: 1. For floats, by inspecting the original last bits when this may make a difference (should happen rarely) 2. For doubles by shifting the bits left to align the denromals and thus not lose the lowest orginal bits. * TST: Test half denormal rounding The test assumes that half to even is active, if this is ever changed or allowed to change, the test will fail. * Fixup: Fixup for halffloat.c The one code path cannot be used. The other must have been a typo, but is a valid bug, a new test for it in the next commit. * TST: half casting lower bits are not lost for denormal results The first test only tested the off by one, this one specifically tests that all bits are used to decide if "round to nearest even" should be used, in the example of rounding towards 0. * TST: Test not just denormals but all positive finite float16s * DOC: Add lots of comments and add a short release note.
author: Sebastian Berg <sebastian@sipsolutions.net> 2019-01-24 18:22:47 +0100
committer: Charles Harris <charlesr.harris@gmail.com> 2019-01-24 10:22:47 -0700
commit: 7e3d558aeee5a8a5eae5ebb6aef03de892a92ebd (patch)
tree: a7acbb60f94d2a4365472e24f5c4460980f69c45
parent: a5cace49b79690795df29c6da3587e7a45a41591 (diff)
download: numpy-7e3d558aeee5a8a5eae5ebb6aef03de892a92ebd.tar.gz
3 files changed, 110 insertions, 7 deletions
diff --git a/doc/release/1.17.0-notes.rst b/doc/release/1.17.0-notes.rst
index 51e195437..d15936cb6 100644
--- a/doc/release/1.17.0-notes.rst
+++ b/doc/release/1.17.0-notes.rst
@@ -30,6 +30,13 @@ Expired deprecations
 Compatibility notes
 ===================
 
+float16 subnormal rounding
+--------------------------
+Casting from a different floating point precision to float16 used incorrect
+rounding in some edge cases. This means in rare cases, subnormal results will
+now be rounded up instead of down, changing the last bit (ULP) of the result.
+
+
 
 C API changes
 =============
diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c
index c2bd28d60..84af86009 100644
--- a/numpy/core/src/npymath/halffloat.c
+++ b/numpy/core/src/npymath/halffloat.c
@@ -301,15 +301,23 @@ npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
             npy_set_floatstatus_underflow();
         }
 #endif
+        /*
+         * Usually the significand is shifted by 13. For subnormals an
+         * additional shift needs to occur. This shift is one for the largest
+         * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
+         * offsets the new first bit. At most the shift can be 1+10 bits.
+         */
         f_sig >>= (113 - f_exp);
         /* Handle rounding by adding 1 to the bit beyond half precision */
 #if NPY_HALF_ROUND_TIES_TO_EVEN
         /*
          * If the last bit in the half significand is 0 (already even), and
          * the remaining bit pattern is 1000...0, then we do not add one
-         * to the bit after the half significand.  In all other cases, we do.
+         * to the bit after the half significand. However, the (113 - f_exp)
+         * shift can lose up to 11 bits, so the || checks them in the original.
+         * In all other cases, we can just add one.
          */
-        if ((f_sig&0x00003fffu) != 0x00001000u) {
+        if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
             f_sig += 0x00001000u;
         }
 #else
@@ -416,7 +424,16 @@ npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
             npy_set_floatstatus_underflow();
         }
 #endif
-        d_sig >>= (1009 - d_exp);
+        /*
+         * Unlike floats, doubles have enough room to shift left to align
+         * the subnormal significand leading to no loss of the last bits.
+         * The smallest possible exponent giving a subnormal is:
+         * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
+         * shifted with respect to it. This adds a shift of 10+1 bits the final
+         * right shift when comparing it to the one in the normal branch.
+         */
+        assert(d_exp - 998 >= 0);
+        d_sig <<= (d_exp - 998);
         /* Handle rounding by adding 1 to the bit beyond half precision */
 #if NPY_HALF_ROUND_TIES_TO_EVEN
         /*
@@ -424,13 +441,13 @@ npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
          * the remaining bit pattern is 1000...0, then we do not add one
          * to the bit after the half significand.  In all other cases, we do.
          */
-        if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
-            d_sig += 0x0000020000000000ULL;
+        if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
+            d_sig += 0x0010000000000000ULL;
         }
 #else
-        d_sig += 0x0000020000000000ULL;
+        d_sig += 0x0010000000000000ULL;
 #endif
-        h_sig = (npy_uint16) (d_sig >> 42);
+        h_sig = (npy_uint16) (d_sig >> 53);
         /*
          * If the rounding causes a bit to spill into h_exp, it will
          * increment h_exp from zero to one and h_sig will be zero.
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index b28c933db..770712501 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -69,6 +69,85 @@ class TestHalf(object):
         j = np.array(i_f16, dtype=int)
         assert_equal(i_int, j)
 
+    @pytest.mark.parametrize("offset", [None, "up", "down"])
+    @pytest.mark.parametrize("shift", [None, "up", "down"])
+    @pytest.mark.parametrize("float_t", [np.float32, np.float64])
+    def test_half_conversion_rounding(self, float_t, shift, offset):
+        # Assumes that round to even is used during casting.
+        max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16)
+
+        # Test all (positive) finite numbers, denormals are most interesting
+        # however:
+        f16s_patterns = np.arange(0, max_pattern+1, dtype=np.uint16)
+        f16s_float = f16s_patterns.view(np.float16).astype(float_t)
+
+        # Shift the values by half a bit up or a down (or do not shift),
+        if shift == "up":
+            f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[1:]
+        elif shift == "down":
+            f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[:-1]
+        else:
+            f16s_float = f16s_float[1:-1]
+
+        # Increase the float by a minimal value:
+        if offset == "up":
+            f16s_float = np.nextafter(f16s_float, float_t(1e50))
+        elif offset == "down":
+            f16s_float = np.nextafter(f16s_float, float_t(-1e50))
+
+        # Convert back to float16 and its bit pattern:
+        res_patterns = f16s_float.astype(np.float16).view(np.uint16)
+
+        # The above calculations tries the original values, or the exact
+        # mid points between the float16 values. It then further offsets them
+        # by as little as possible. If no offset occurs, "round to even"
+        # logic will be necessary, an arbitrarily small offset should cause
+        # normal up/down rounding always.
+
+        # Calculate the expecte pattern:
+        cmp_patterns = f16s_patterns[1:-1].copy()
+
+        if shift == "down" and offset != "up":
+            shift_pattern = -1
+        elif shift == "up" and offset != "down":
+            shift_pattern = 1
+        else:
+            # There cannot be a shift, either shift is None, so all rounding
+            # will go back to original, or shift is reduced by offset too much.
+            shift_pattern = 0
+
+        # If rounding occurs, is it normal rounding or round to even?
+        if offset is None:
+            # Round to even occurs, modify only non-even, cast to allow + (-1)
+            cmp_patterns[0::2].view(np.int16)[...] += shift_pattern
+        else:
+            cmp_patterns.view(np.int16)[...] += shift_pattern
+
+        assert_equal(res_patterns, cmp_patterns)
+
+    @pytest.mark.parametrize(["float_t", "uint_t", "bits"],
+                             [(np.float32, np.uint32, 23),
+                              (np.float64, np.uint64, 52)])
+    def test_half_conversion_denormal_round_even(self, float_t, uint_t, bits):
+        # Test specifically that all bits are considered when deciding
+        # whether round to even should occur (i.e. no bits are lost at the
+        # end. Compare also gh-12721. The most bits can get lost for the
+        # smallest denormal:
+        smallest_value = np.uint16(1).view(np.float16).astype(float_t)
+        assert smallest_value == 2**-24
+
+        # Will be rounded to zero based on round to even rule:
+        rounded_to_zero = smallest_value / float_t(2)
+        assert rounded_to_zero.astype(np.float16) == 0
+
+        # The significand will be all 0 for the float_t, test that we do not
+        # lose the lower ones of these:
+        for i in range(bits):
+            # slightly increasing the value should make it round up:
+            larger_pattern = rounded_to_zero.view(uint_t) | uint_t(1 << i)
+            larger_value = larger_pattern.view(float_t)
+            assert larger_value.astype(np.float16) == smallest_value
+
     def test_nans_infs(self):
         with np.errstate(all='ignore'):
             # Check some of the ufuncs
author	Sebastian Berg <sebastian@sipsolutions.net>	2019-01-24 18:22:47 +0100
committer	Charles Harris <charlesr.harris@gmail.com>	2019-01-24 10:22:47 -0700
commit	7e3d558aeee5a8a5eae5ebb6aef03de892a92ebd (patch)
tree	a7acbb60f94d2a4365472e24f5c4460980f69c45
parent	a5cace49b79690795df29c6da3587e7a45a41591 (diff)
download	numpy-7e3d558aeee5a8a5eae5ebb6aef03de892a92ebd.tar.gz