diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/setup_common.py | 12 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dragon4.c | 251 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dragon4.h | 4 | ||||
-rw-r--r-- | numpy/core/src/npymath/npy_math_private.h | 4 | ||||
-rw-r--r-- | numpy/core/src/private/npy_fpmath.h | 12 | ||||
-rw-r--r-- | numpy/core/tests/test_scalarprint.py | 66 |
6 files changed, 335 insertions, 14 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 450fb1b9d..70a43046c 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -335,9 +335,9 @@ _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171', _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000', '000', '000', '000', '000', '000', '000', '000', '000'] _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1] -_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + +_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] + ['000'] * 8) -_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + +_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] + ['000'] * 8) def long_double_representation(lines): @@ -381,10 +381,10 @@ def long_double_representation(lines): return 'IEEE_QUAD_BE' elif read[8:-8] == _IEEE_QUAD_PREC_LE: return 'IEEE_QUAD_LE' - elif read[8:-8] == _DOUBLE_DOUBLE_BE: - return 'DOUBLE_DOUBLE_BE' - elif read[8:-8] == _DOUBLE_DOUBLE_LE: - return 'DOUBLE_DOUBLE_LE' + elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE: + return 'IBM_DOUBLE_DOUBLE_LE' + elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE: + return 'IBM_DOUBLE_DOUBLE_BE' # if the content was 8 bytes, left with 32-8-8 = 16 bytes elif read[:16] == _BEFORE_SEQ: if read[16:-8] == _IEEE_DOUBLE_LE: diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c index 8f7dc0e2a..c14653ac5 100644 --- a/numpy/core/src/multiarray/dragon4.c +++ b/numpy/core/src/multiarray/dragon4.c @@ -215,7 +215,9 @@ BigInt_Set_uint64(BigInt *i, npy_uint64 val) } } -#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) +#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ + defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ + defined(HAVE_LDOUBLE_IEEE_QUAD_LE)) static void BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) { @@ -247,7 +249,7 @@ BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) i->blocks[0] = lo & bitmask_u64(32); } } -#endif /* QUAD */ +#endif /* DOUBLE_DOUBLE and QUAD */ static void BigInt_Set_uint32(BigInt *i, npy_uint32 val) @@ -2810,6 +2812,251 @@ Dragon4_PrintFloat_IEEE_binary128( } #endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ +#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ + defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)) +/* + * IBM extended precision 128-bit floating-point format, aka IBM double-dobule + * + * IBM's double-double type is a pair of IEEE binary64 values, which you add + * together to get a total value. The exponents are arranged so that the lower + * double is about 2^52 times smaller than the high one, and the nearest + * float64 value is simply the upper double, in which case the pair is + * considered "normalized" (not to confuse with "normal" and "subnormal" + * binary64 values). We assume normalized values. You can see the glibc's + * printf on ppc does so too by constructing un-normalized values to get + * strange behavior from the OS printf: + * + * >>> from numpy.core._multiarray_tests import format_float_OSprintf_g + * >>> x = np.array([0.3,0.3], dtype='f8').view('f16')[0] + * >>> format_float_OSprintf_g(x, 2) + * 0.30 + * >>> format_float_OSprintf_g(2*x, 2) + * 1.20 + * + * If we don't assume normalization, x should really print as 0.6. + * + * For normalized values gcc assumes that the total mantissa is no + * more than 106 bits (53+53), so we can drop bits from the second double which + * would be pushed past 106 when left-shifting by its exponent, as happens + * sometimes. (There has been debate about this, see + * https://gcc.gnu.org/bugzilla/show_bug.cgi?format=multiple&id=70117, + * https://sourceware.org/bugzilla/show_bug.cgi?id=22752 ) + * + * Note: This function is for the IBM-double-double which is a pair of IEEE + * binary64 floats, like on ppc64 systems. This is *not* the hexadecimal + * IBM-double-double type, which is a pair of IBM hexadecimal64 floats. + * + * See also: + * https://gcc.gnu.org/wiki/Ieee128PowerPCA + * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm + */ +static npy_uint32 +Dragon4_PrintFloat_IBM_double_double( + Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt) +{ + char *buffer = scratch->repr; + npy_uint32 bufferSize = sizeof(scratch->repr); + BigInt *bigints = scratch->bigints; + + npy_uint32 floatExponent1, floatExponent2; + npy_uint64 floatMantissa1, floatMantissa2; + npy_uint32 floatSign1, floatSign2; + + npy_uint64 mantissa1, mantissa2; + npy_int32 exponent1, exponent2; + int shift; + npy_uint32 mantissaBit; + npy_bool hasUnequalMargins; + char signbit = '\0'; + + if (bufferSize == 0) { + return 0; + } + + if (bufferSize == 1) { + buffer[0] = '\0'; + return 0; + } + + /* deconstruct the floating point values */ + floatMantissa1 = val128.hi & bitmask_u64(52); + floatExponent1 = (val128.hi >> 52) & bitmask_u32(11); + floatSign1 = (val128.hi >> 63) != 0; + + floatMantissa2 = val128.lo & bitmask_u64(52); + floatExponent2 = (val128.lo >> 52) & bitmask_u32(11); + floatSign2 = (val128.lo >> 63) != 0; + + /* output the sign using 1st float's sign */ + if (floatSign1) { + signbit = '-'; + } + else if (opt->sign) { + signbit = '+'; + } + + /* we only need to look at the first float for inf/nan */ + if (floatExponent1 == bitmask_u32(11)) { + return PrintInfNan(buffer, bufferSize, floatMantissa1, 13, signbit); + } + + /* else this is a number */ + + /* Factor the 1st value into its parts, see binary64 for comments. */ + if (floatExponent1 == 0) { + /* + * If the first number is a subnormal value, the 2nd has to be 0 for + * the float128 to be normalized, so we can ignore it. In this case + * the float128 only has the precision of a single binary64 value. + */ + mantissa1 = floatMantissa1; + exponent1 = 1 - 1023 - 52; + mantissaBit = LogBase2_64(mantissa1); + hasUnequalMargins = NPY_FALSE; + + BigInt_Set_uint64(&bigints[0], mantissa1); + } + else { + mantissa1 = (1ull << 52) | floatMantissa1; + exponent1 = floatExponent1 - 1023 - 52; + mantissaBit = 52 + 53; + + /* + * Computing hasUnequalMargins and mantissaBit: + * This is a little trickier than for IEEE formats. + * + * When both doubles are "normal" it is clearer since we can think of + * it as an IEEE type with a 106 bit mantissa. This value can never + * have "unequal" margins because of the implied 1 bit in the 2nd + * value. (unequal margins only happen when the mantissa has a value + * like "10000000000...", all zeros except the implied bit at the + * start, since the next lowest number has a different exponent). + * mantissaBits will always be 52+53 in this case. + * + * If the 1st number is a very small normal, and the 2nd is subnormal + * and not 2^52 times smaller, the number behaves like a subnormal + * overall, where the upper number just adds some bits on the left. + * Like usual subnormals, it has "equal" margins. The slightly tricky + * thing is that the number of mantissaBits varies. It will be 52 + * (from lower double) plus a variable number depending on the upper + * number's exponent. We recompute the number of bits in the shift + * calculation below, because the shift will be equal to the number of + * lost bits. + * + * We can get unequal margins only if the first value has all-0 + * mantissa (except implied bit), and the second value is exactly 0. As + * a special exception the smallest normal value (smallest exponent, 0 + * mantissa) should have equal margins, since it is "next to" a + * subnormal value. + */ + + /* factor the 2nd value into its parts */ + if (floatExponent2 != 0) { + mantissa2 = (1ull << 52) | floatMantissa2; + exponent2 = floatExponent2 - 1023 - 52; + hasUnequalMargins = NPY_FALSE; + } + else { + /* shift exp by one so that leading mantissa bit is still bit 53 */ + mantissa2 = floatMantissa2 << 1; + exponent2 = - 1023 - 52; + hasUnequalMargins = (floatExponent1 != 1) && (floatMantissa1 == 0) + && (floatMantissa2 == 0); + } + + /* + * The 2nd val's exponent might not be exactly 52 smaller than the 1st, + * it can vary a little bit. So do some shifting of the low mantissa, + * so that the total mantissa is equivalent to bits 53 to 0 of the + * first double immediately followed by bits 53 to 0 of the second. + */ + shift = exponent1 - exponent2 - 53; + if (shift > 0) { + /* shift more than 64 is undefined behavior */ + mantissa2 = shift < 64 ? mantissa2 >> shift : 0; + } + else if (shift < 0) { + /* + * This only happens if the 2nd value is subnormal. + * We expect that shift > -64, but check it anyway + */ + mantissa2 = -shift < 64 ? mantissa2 << -shift : 0; + } + + /* + * If the low double is a different sign from the high double, + * rearrange so that the total mantissa is the sum of the two + * mantissas, instead of a subtraction. + * hi - lo -> (hi-1) + (1-lo), where lo < 1 + */ + if (floatSign1 != floatSign2 && mantissa2 != 0) { + mantissa1--; + mantissa2 = (1ull << 53) - mantissa2; + } + + /* + * Compute the number of bits if we are in the subnormal range. + * The value "shift" happens to be exactly the number of lost bits. + * Also, shift the bits so that the least significant bit is at + * bit position 0, like a typical subnormal. After this exponent1 + * should always be 2^-1022 + */ + if (shift < 0) { + mantissa2 = (mantissa2 >> -shift) | (mantissa1 << (53 + shift)); + mantissa1 = mantissa1 >> -shift; + mantissaBit = mantissaBit -(-shift); + exponent1 -= shift; + DEBUG_ASSERT(exponent1 == -1022); + } + + /* + * set up the BigInt mantissa, by shifting the parts as needed + * We can use | instead of + since the mantissas should not overlap + */ + BigInt_Set_2x_uint64(&bigints[0], mantissa1 >> 11, + (mantissa1 << 53) | (mantissa2)); + exponent1 = exponent1 - 53; + } + + return Format_floatbits(buffer, bufferSize, bigints, exponent1, + signbit, mantissaBit, hasUnequalMargins, opt); +} + +#if defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) +static npy_uint32 +Dragon4_PrintFloat_IBM_double_double_le( + Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) +{ + FloatVal128 val128; + FloatUnion128 buf128; + + buf128.floatingPoint = *value; + val128.lo = buf128.integer.a; + val128.hi = buf128.integer.b; + + return Dragon4_PrintFloat_IBM_double_double(scratch, val128, opt); +} +#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE */ + +#if defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) +static npy_uint32 +Dragon4_PrintFloat_IBM_double_double_be( + Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) +{ + FloatVal128 val128; + FloatUnion128 buf128; + + buf128.floatingPoint = *value; + val128.hi = buf128.integer.a; + val128.lo = buf128.integer.b; + + return Dragon4_PrintFloat_IBM_double_double(scratch, val128, opt); +} + +#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */ + +#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE | HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */ + #endif /* NPY_FLOAT128 */ diff --git a/numpy/core/src/multiarray/dragon4.h b/numpy/core/src/multiarray/dragon4.h index 176b79f69..383a0949d 100644 --- a/numpy/core/src/multiarray/dragon4.h +++ b/numpy/core/src/multiarray/dragon4.h @@ -75,6 +75,10 @@ #define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended128 #elif defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) #define NPY_LONGDOUBLE_BINFMT_NAME Motorola_extended96 +#elif defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) + #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double_le +#elif defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) + #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double_be #else #error No long double representation defined #endif diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h index 7ee564239..e4a919db6 100644 --- a/numpy/core/src/npymath/npy_math_private.h +++ b/numpy/core/src/npymath/npy_math_private.h @@ -434,8 +434,8 @@ do { \ typedef npy_uint32 ldouble_sign_t; #endif -#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \ - !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) +#if !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) && \ + !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) /* Get the sign bit of x. x should be of type IEEEl2bitsrep */ #define GET_LDOUBLE_SIGN(x) \ (((x).a[LDBL_SIGN_INDEX] & LDBL_SIGN_MASK) >> LDBL_SIGN_SHIFT) diff --git a/numpy/core/src/private/npy_fpmath.h b/numpy/core/src/private/npy_fpmath.h index e1521de3b..dbb3fb23d 100644 --- a/numpy/core/src/private/npy_fpmath.h +++ b/numpy/core/src/private/npy_fpmath.h @@ -14,9 +14,17 @@ defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \ defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \ defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) || \ - defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \ - defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)) + defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ + defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)) #error No long double representation defined #endif +/* for back-compat, also keep old name for double-double */ +#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE + #define HAVE_LDOUBLE_DOUBLE_DOUBLE_LE +#endif +#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE + #define HAVE_LDOUBLE_DOUBLE_DOUBLE_BE +#endif + #endif diff --git a/numpy/core/tests/test_scalarprint.py b/numpy/core/tests/test_scalarprint.py index a20ec9f74..472ff691d 100644 --- a/numpy/core/tests/test_scalarprint.py +++ b/numpy/core/tests/test_scalarprint.py @@ -5,10 +5,12 @@ from __future__ import division, absolute_import, print_function import code, sys +import platform +import pytest + from tempfile import TemporaryFile import numpy as np -from numpy.testing import assert_, assert_equal, suppress_warnings - +from numpy.testing import assert_, assert_equal, suppress_warnings, dec class TestRealScalars(object): def test_str(self): @@ -250,6 +252,66 @@ class TestRealScalars(object): "1.2" if tp != np.float16 else "1.2002") assert_equal(fpos(tp('1.'), trim='-'), "1") + @pytest.mark.skipif(not platform.machine().startswith("ppc64"), + reason="only applies to ppc float128 values") + def test_ppc64_ibm_double_double128(self): + # check that the precision decreases once we get into the subnormal + # range. Unlike float64, this starts around 1e-292 instead of 1e-308, + # which happens when the first double is normal and the second is + # subnormal. + x = np.float128('2.123123123123123123123123123123123e-286') + got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)] + expected = [ + "1.06156156156156156156156156156157e-286", + "1.06156156156156156156156156156158e-287", + "1.06156156156156156156156156156159e-288", + "1.0615615615615615615615615615616e-289", + "1.06156156156156156156156156156157e-290", + "1.06156156156156156156156156156156e-291", + "1.0615615615615615615615615615616e-292", + "1.0615615615615615615615615615615e-293", + "1.061561561561561561561561561562e-294", + "1.06156156156156156156156156155e-295", + "1.0615615615615615615615615616e-296", + "1.06156156156156156156156156e-297", + "1.06156156156156156156156157e-298", + "1.0615615615615615615615616e-299", + "1.06156156156156156156156e-300", + "1.06156156156156156156155e-301", + "1.0615615615615615615616e-302", + "1.061561561561561561562e-303", + "1.06156156156156156156e-304", + "1.0615615615615615618e-305", + "1.06156156156156156e-306", + "1.06156156156156157e-307", + "1.0615615615615616e-308", + "1.06156156156156e-309", + "1.06156156156157e-310", + "1.0615615615616e-311", + "1.06156156156e-312", + "1.06156156154e-313", + "1.0615615616e-314", + "1.06156156e-315", + "1.06156155e-316", + "1.061562e-317", + "1.06156e-318", + "1.06155e-319", + "1.0617e-320", + "1.06e-321", + "1.04e-322", + "1e-323", + "0.0", + "0.0"] + assert_equal(got, expected) + + # Note: we follow glibc behavior, but it (or gcc) might not be right. + # In particular we can get two values that print the same but are not + # equal: + a = np.float128('2')/np.float128('3') + b = np.float128(str(a)) + assert_equal(str(a), str(b)) + assert_(a != b) + def float32_roundtrip(self): # gh-9360 x = np.float32(1024 - 2**-14) |