summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/setup_common.py12
-rw-r--r--numpy/core/src/multiarray/dragon4.c251
-rw-r--r--numpy/core/src/multiarray/dragon4.h4
-rw-r--r--numpy/core/src/npymath/npy_math_private.h4
-rw-r--r--numpy/core/src/private/npy_fpmath.h12
-rw-r--r--numpy/core/tests/test_scalarprint.py66
6 files changed, 335 insertions, 14 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 450fb1b9d..70a43046c 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -335,9 +335,9 @@ _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171',
_IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
'000', '000', '000', '000', '000', '000', '000', '000']
_IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
-_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
+_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
['000'] * 8)
-_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
+_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
['000'] * 8)
def long_double_representation(lines):
@@ -381,10 +381,10 @@ def long_double_representation(lines):
return 'IEEE_QUAD_BE'
elif read[8:-8] == _IEEE_QUAD_PREC_LE:
return 'IEEE_QUAD_LE'
- elif read[8:-8] == _DOUBLE_DOUBLE_BE:
- return 'DOUBLE_DOUBLE_BE'
- elif read[8:-8] == _DOUBLE_DOUBLE_LE:
- return 'DOUBLE_DOUBLE_LE'
+ elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE:
+ return 'IBM_DOUBLE_DOUBLE_LE'
+ elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE:
+ return 'IBM_DOUBLE_DOUBLE_BE'
# if the content was 8 bytes, left with 32-8-8 = 16 bytes
elif read[:16] == _BEFORE_SEQ:
if read[16:-8] == _IEEE_DOUBLE_LE:
diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c
index 8f7dc0e2a..c14653ac5 100644
--- a/numpy/core/src/multiarray/dragon4.c
+++ b/numpy/core/src/multiarray/dragon4.c
@@ -215,7 +215,9 @@ BigInt_Set_uint64(BigInt *i, npy_uint64 val)
}
}
-#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE)
+#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \
+ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \
+ defined(HAVE_LDOUBLE_IEEE_QUAD_LE))
static void
BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo)
{
@@ -247,7 +249,7 @@ BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo)
i->blocks[0] = lo & bitmask_u64(32);
}
}
-#endif /* QUAD */
+#endif /* DOUBLE_DOUBLE and QUAD */
static void
BigInt_Set_uint32(BigInt *i, npy_uint32 val)
@@ -2810,6 +2812,251 @@ Dragon4_PrintFloat_IEEE_binary128(
}
#endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */
+#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \
+ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE))
+/*
+ * IBM extended precision 128-bit floating-point format, aka IBM double-dobule
+ *
+ * IBM's double-double type is a pair of IEEE binary64 values, which you add
+ * together to get a total value. The exponents are arranged so that the lower
+ * double is about 2^52 times smaller than the high one, and the nearest
+ * float64 value is simply the upper double, in which case the pair is
+ * considered "normalized" (not to confuse with "normal" and "subnormal"
+ * binary64 values). We assume normalized values. You can see the glibc's
+ * printf on ppc does so too by constructing un-normalized values to get
+ * strange behavior from the OS printf:
+ *
+ * >>> from numpy.core._multiarray_tests import format_float_OSprintf_g
+ * >>> x = np.array([0.3,0.3], dtype='f8').view('f16')[0]
+ * >>> format_float_OSprintf_g(x, 2)
+ * 0.30
+ * >>> format_float_OSprintf_g(2*x, 2)
+ * 1.20
+ *
+ * If we don't assume normalization, x should really print as 0.6.
+ *
+ * For normalized values gcc assumes that the total mantissa is no
+ * more than 106 bits (53+53), so we can drop bits from the second double which
+ * would be pushed past 106 when left-shifting by its exponent, as happens
+ * sometimes. (There has been debate about this, see
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?format=multiple&id=70117,
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=22752 )
+ *
+ * Note: This function is for the IBM-double-double which is a pair of IEEE
+ * binary64 floats, like on ppc64 systems. This is *not* the hexadecimal
+ * IBM-double-double type, which is a pair of IBM hexadecimal64 floats.
+ *
+ * See also:
+ * https://gcc.gnu.org/wiki/Ieee128PowerPCA
+ * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm
+ */
+static npy_uint32
+Dragon4_PrintFloat_IBM_double_double(
+ Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt)
+{
+ char *buffer = scratch->repr;
+ npy_uint32 bufferSize = sizeof(scratch->repr);
+ BigInt *bigints = scratch->bigints;
+
+ npy_uint32 floatExponent1, floatExponent2;
+ npy_uint64 floatMantissa1, floatMantissa2;
+ npy_uint32 floatSign1, floatSign2;
+
+ npy_uint64 mantissa1, mantissa2;
+ npy_int32 exponent1, exponent2;
+ int shift;
+ npy_uint32 mantissaBit;
+ npy_bool hasUnequalMargins;
+ char signbit = '\0';
+
+ if (bufferSize == 0) {
+ return 0;
+ }
+
+ if (bufferSize == 1) {
+ buffer[0] = '\0';
+ return 0;
+ }
+
+ /* deconstruct the floating point values */
+ floatMantissa1 = val128.hi & bitmask_u64(52);
+ floatExponent1 = (val128.hi >> 52) & bitmask_u32(11);
+ floatSign1 = (val128.hi >> 63) != 0;
+
+ floatMantissa2 = val128.lo & bitmask_u64(52);
+ floatExponent2 = (val128.lo >> 52) & bitmask_u32(11);
+ floatSign2 = (val128.lo >> 63) != 0;
+
+ /* output the sign using 1st float's sign */
+ if (floatSign1) {
+ signbit = '-';
+ }
+ else if (opt->sign) {
+ signbit = '+';
+ }
+
+ /* we only need to look at the first float for inf/nan */
+ if (floatExponent1 == bitmask_u32(11)) {
+ return PrintInfNan(buffer, bufferSize, floatMantissa1, 13, signbit);
+ }
+
+ /* else this is a number */
+
+ /* Factor the 1st value into its parts, see binary64 for comments. */
+ if (floatExponent1 == 0) {
+ /*
+ * If the first number is a subnormal value, the 2nd has to be 0 for
+ * the float128 to be normalized, so we can ignore it. In this case
+ * the float128 only has the precision of a single binary64 value.
+ */
+ mantissa1 = floatMantissa1;
+ exponent1 = 1 - 1023 - 52;
+ mantissaBit = LogBase2_64(mantissa1);
+ hasUnequalMargins = NPY_FALSE;
+
+ BigInt_Set_uint64(&bigints[0], mantissa1);
+ }
+ else {
+ mantissa1 = (1ull << 52) | floatMantissa1;
+ exponent1 = floatExponent1 - 1023 - 52;
+ mantissaBit = 52 + 53;
+
+ /*
+ * Computing hasUnequalMargins and mantissaBit:
+ * This is a little trickier than for IEEE formats.
+ *
+ * When both doubles are "normal" it is clearer since we can think of
+ * it as an IEEE type with a 106 bit mantissa. This value can never
+ * have "unequal" margins because of the implied 1 bit in the 2nd
+ * value. (unequal margins only happen when the mantissa has a value
+ * like "10000000000...", all zeros except the implied bit at the
+ * start, since the next lowest number has a different exponent).
+ * mantissaBits will always be 52+53 in this case.
+ *
+ * If the 1st number is a very small normal, and the 2nd is subnormal
+ * and not 2^52 times smaller, the number behaves like a subnormal
+ * overall, where the upper number just adds some bits on the left.
+ * Like usual subnormals, it has "equal" margins. The slightly tricky
+ * thing is that the number of mantissaBits varies. It will be 52
+ * (from lower double) plus a variable number depending on the upper
+ * number's exponent. We recompute the number of bits in the shift
+ * calculation below, because the shift will be equal to the number of
+ * lost bits.
+ *
+ * We can get unequal margins only if the first value has all-0
+ * mantissa (except implied bit), and the second value is exactly 0. As
+ * a special exception the smallest normal value (smallest exponent, 0
+ * mantissa) should have equal margins, since it is "next to" a
+ * subnormal value.
+ */
+
+ /* factor the 2nd value into its parts */
+ if (floatExponent2 != 0) {
+ mantissa2 = (1ull << 52) | floatMantissa2;
+ exponent2 = floatExponent2 - 1023 - 52;
+ hasUnequalMargins = NPY_FALSE;
+ }
+ else {
+ /* shift exp by one so that leading mantissa bit is still bit 53 */
+ mantissa2 = floatMantissa2 << 1;
+ exponent2 = - 1023 - 52;
+ hasUnequalMargins = (floatExponent1 != 1) && (floatMantissa1 == 0)
+ && (floatMantissa2 == 0);
+ }
+
+ /*
+ * The 2nd val's exponent might not be exactly 52 smaller than the 1st,
+ * it can vary a little bit. So do some shifting of the low mantissa,
+ * so that the total mantissa is equivalent to bits 53 to 0 of the
+ * first double immediately followed by bits 53 to 0 of the second.
+ */
+ shift = exponent1 - exponent2 - 53;
+ if (shift > 0) {
+ /* shift more than 64 is undefined behavior */
+ mantissa2 = shift < 64 ? mantissa2 >> shift : 0;
+ }
+ else if (shift < 0) {
+ /*
+ * This only happens if the 2nd value is subnormal.
+ * We expect that shift > -64, but check it anyway
+ */
+ mantissa2 = -shift < 64 ? mantissa2 << -shift : 0;
+ }
+
+ /*
+ * If the low double is a different sign from the high double,
+ * rearrange so that the total mantissa is the sum of the two
+ * mantissas, instead of a subtraction.
+ * hi - lo -> (hi-1) + (1-lo), where lo < 1
+ */
+ if (floatSign1 != floatSign2 && mantissa2 != 0) {
+ mantissa1--;
+ mantissa2 = (1ull << 53) - mantissa2;
+ }
+
+ /*
+ * Compute the number of bits if we are in the subnormal range.
+ * The value "shift" happens to be exactly the number of lost bits.
+ * Also, shift the bits so that the least significant bit is at
+ * bit position 0, like a typical subnormal. After this exponent1
+ * should always be 2^-1022
+ */
+ if (shift < 0) {
+ mantissa2 = (mantissa2 >> -shift) | (mantissa1 << (53 + shift));
+ mantissa1 = mantissa1 >> -shift;
+ mantissaBit = mantissaBit -(-shift);
+ exponent1 -= shift;
+ DEBUG_ASSERT(exponent1 == -1022);
+ }
+
+ /*
+ * set up the BigInt mantissa, by shifting the parts as needed
+ * We can use | instead of + since the mantissas should not overlap
+ */
+ BigInt_Set_2x_uint64(&bigints[0], mantissa1 >> 11,
+ (mantissa1 << 53) | (mantissa2));
+ exponent1 = exponent1 - 53;
+ }
+
+ return Format_floatbits(buffer, bufferSize, bigints, exponent1,
+ signbit, mantissaBit, hasUnequalMargins, opt);
+}
+
+#if defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)
+static npy_uint32
+Dragon4_PrintFloat_IBM_double_double_le(
+ Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+{
+ FloatVal128 val128;
+ FloatUnion128 buf128;
+
+ buf128.floatingPoint = *value;
+ val128.lo = buf128.integer.a;
+ val128.hi = buf128.integer.b;
+
+ return Dragon4_PrintFloat_IBM_double_double(scratch, val128, opt);
+}
+#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE */
+
+#if defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)
+static npy_uint32
+Dragon4_PrintFloat_IBM_double_double_be(
+ Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+{
+ FloatVal128 val128;
+ FloatUnion128 buf128;
+
+ buf128.floatingPoint = *value;
+ val128.hi = buf128.integer.a;
+ val128.lo = buf128.integer.b;
+
+ return Dragon4_PrintFloat_IBM_double_double(scratch, val128, opt);
+}
+
+#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */
+
+#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE | HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */
+
#endif /* NPY_FLOAT128 */
diff --git a/numpy/core/src/multiarray/dragon4.h b/numpy/core/src/multiarray/dragon4.h
index 176b79f69..383a0949d 100644
--- a/numpy/core/src/multiarray/dragon4.h
+++ b/numpy/core/src/multiarray/dragon4.h
@@ -75,6 +75,10 @@
#define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended128
#elif defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE)
#define NPY_LONGDOUBLE_BINFMT_NAME Motorola_extended96
+#elif defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)
+ #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double_le
+#elif defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)
+ #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double_be
#else
#error No long double representation defined
#endif
diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h
index 7ee564239..e4a919db6 100644
--- a/numpy/core/src/npymath/npy_math_private.h
+++ b/numpy/core/src/npymath/npy_math_private.h
@@ -434,8 +434,8 @@ do { \
typedef npy_uint32 ldouble_sign_t;
#endif
-#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \
- !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)
+#if !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) && \
+ !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)
/* Get the sign bit of x. x should be of type IEEEl2bitsrep */
#define GET_LDOUBLE_SIGN(x) \
(((x).a[LDBL_SIGN_INDEX] & LDBL_SIGN_MASK) >> LDBL_SIGN_SHIFT)
diff --git a/numpy/core/src/private/npy_fpmath.h b/numpy/core/src/private/npy_fpmath.h
index e1521de3b..dbb3fb23d 100644
--- a/numpy/core/src/private/npy_fpmath.h
+++ b/numpy/core/src/private/npy_fpmath.h
@@ -14,9 +14,17 @@
defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \
defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \
defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) || \
- defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \
- defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE))
+ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \
+ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE))
#error No long double representation defined
#endif
+/* for back-compat, also keep old name for double-double */
+#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE
+ #define HAVE_LDOUBLE_DOUBLE_DOUBLE_LE
+#endif
+#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE
+ #define HAVE_LDOUBLE_DOUBLE_DOUBLE_BE
+#endif
+
#endif
diff --git a/numpy/core/tests/test_scalarprint.py b/numpy/core/tests/test_scalarprint.py
index a20ec9f74..472ff691d 100644
--- a/numpy/core/tests/test_scalarprint.py
+++ b/numpy/core/tests/test_scalarprint.py
@@ -5,10 +5,12 @@
from __future__ import division, absolute_import, print_function
import code, sys
+import platform
+import pytest
+
from tempfile import TemporaryFile
import numpy as np
-from numpy.testing import assert_, assert_equal, suppress_warnings
-
+from numpy.testing import assert_, assert_equal, suppress_warnings, dec
class TestRealScalars(object):
def test_str(self):
@@ -250,6 +252,66 @@ class TestRealScalars(object):
"1.2" if tp != np.float16 else "1.2002")
assert_equal(fpos(tp('1.'), trim='-'), "1")
+ @pytest.mark.skipif(not platform.machine().startswith("ppc64"),
+ reason="only applies to ppc float128 values")
+ def test_ppc64_ibm_double_double128(self):
+ # check that the precision decreases once we get into the subnormal
+ # range. Unlike float64, this starts around 1e-292 instead of 1e-308,
+ # which happens when the first double is normal and the second is
+ # subnormal.
+ x = np.float128('2.123123123123123123123123123123123e-286')
+ got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)]
+ expected = [
+ "1.06156156156156156156156156156157e-286",
+ "1.06156156156156156156156156156158e-287",
+ "1.06156156156156156156156156156159e-288",
+ "1.0615615615615615615615615615616e-289",
+ "1.06156156156156156156156156156157e-290",
+ "1.06156156156156156156156156156156e-291",
+ "1.0615615615615615615615615615616e-292",
+ "1.0615615615615615615615615615615e-293",
+ "1.061561561561561561561561561562e-294",
+ "1.06156156156156156156156156155e-295",
+ "1.0615615615615615615615615616e-296",
+ "1.06156156156156156156156156e-297",
+ "1.06156156156156156156156157e-298",
+ "1.0615615615615615615615616e-299",
+ "1.06156156156156156156156e-300",
+ "1.06156156156156156156155e-301",
+ "1.0615615615615615615616e-302",
+ "1.061561561561561561562e-303",
+ "1.06156156156156156156e-304",
+ "1.0615615615615615618e-305",
+ "1.06156156156156156e-306",
+ "1.06156156156156157e-307",
+ "1.0615615615615616e-308",
+ "1.06156156156156e-309",
+ "1.06156156156157e-310",
+ "1.0615615615616e-311",
+ "1.06156156156e-312",
+ "1.06156156154e-313",
+ "1.0615615616e-314",
+ "1.06156156e-315",
+ "1.06156155e-316",
+ "1.061562e-317",
+ "1.06156e-318",
+ "1.06155e-319",
+ "1.0617e-320",
+ "1.06e-321",
+ "1.04e-322",
+ "1e-323",
+ "0.0",
+ "0.0"]
+ assert_equal(got, expected)
+
+ # Note: we follow glibc behavior, but it (or gcc) might not be right.
+ # In particular we can get two values that print the same but are not
+ # equal:
+ a = np.float128('2')/np.float128('3')
+ b = np.float128(str(a))
+ assert_equal(str(a), str(b))
+ assert_(a != b)
+
def float32_roundtrip(self):
# gh-9360
x = np.float32(1024 - 2**-14)