6 files changed, 335 insertions, 14 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 450fb1b9d..70a43046c 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -335,9 +335,9 @@ _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171',
 _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
                       '000', '000', '000', '000', '000', '000', '000', '000']
 _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
-_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
+_IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
                      ['000'] * 8)
-_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
+_IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
                      ['000'] * 8)
 
 def long_double_representation(lines):
@@ -381,10 +381,10 @@ def long_double_representation(lines):
                         return 'IEEE_QUAD_BE'
                     elif read[8:-8] == _IEEE_QUAD_PREC_LE:
                         return 'IEEE_QUAD_LE'
-                    elif read[8:-8] == _DOUBLE_DOUBLE_BE:
-                        return 'DOUBLE_DOUBLE_BE'
-                    elif read[8:-8] == _DOUBLE_DOUBLE_LE:
-                        return 'DOUBLE_DOUBLE_LE'
+                    elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE:
+                        return 'IBM_DOUBLE_DOUBLE_LE'
+                    elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE:
+                        return 'IBM_DOUBLE_DOUBLE_BE'
                 # if the content was 8 bytes, left with 32-8-8 = 16 bytes
                 elif read[:16] == _BEFORE_SEQ:
                     if read[16:-8] == _IEEE_DOUBLE_LE:
diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c
index 8f7dc0e2a..c14653ac5 100644
--- a/numpy/core/src/multiarray/dragon4.c
+++ b/numpy/core/src/multiarray/dragon4.c
@@ -215,7 +215,9 @@ BigInt_Set_uint64(BigInt *i, npy_uint64 val)
     }
 }
 
-#if defined(HAVE_LDOUBLE_IEEE_QUAD_LE)
+#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \
+     defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \
+     defined(HAVE_LDOUBLE_IEEE_QUAD_LE))
 static void
 BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo)
 {
@@ -247,7 +249,7 @@ BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo)
             i->blocks[0] = lo & bitmask_u64(32);
     }
 }
-#endif /* QUAD */
+#endif /* DOUBLE_DOUBLE and QUAD */
 
 static void
 BigInt_Set_uint32(BigInt *i, npy_uint32 val)
@@ -2810,6 +2812,251 @@ Dragon4_PrintFloat_IEEE_binary128(
 }
 #endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */
 
+#if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \
+     defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE))
+/*
+ * IBM extended precision 128-bit floating-point format, aka IBM double-dobule
+ *
+ * IBM's double-double type is a pair of IEEE binary64 values, which you add
+ * together to get a total value. The exponents are arranged so that the lower
+ * double is about 2^52 times smaller than the high one, and the nearest
+ * float64 value is simply the upper double, in which case the pair is
+ * considered "normalized" (not to confuse with "normal" and "subnormal"
+ * binary64 values). We assume normalized values. You can see the glibc's
+ * printf on ppc does so too by constructing un-normalized values to get
+ * strange behavior from the OS printf:
+ *
+ *     >>> from numpy.core._multiarray_tests import format_float_OSprintf_g
+ *     >>> x = np.array([0.3,0.3], dtype='f8').view('f16')[0]
+ *     >>> format_float_OSprintf_g(x, 2)
+ *     0.30
+ *     >>> format_float_OSprintf_g(2*x, 2)
+ *     1.20
+ *
+ * If we don't assume normalization, x should really print as 0.6.
+ *
+ * For normalized values gcc assumes that the total mantissa is no
+ * more than 106 bits (53+53), so we can drop bits from the second double which
+ * would be pushed past 106 when left-shifting by its exponent, as happens
+ * sometimes. (There has been debate about this, see
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?format=multiple&id=70117,
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=22752 )
+ *
+ * Note: This function is for the IBM-double-double which is a pair of IEEE
+ * binary64 floats, like on ppc64 systems. This is *not* the hexadecimal
+ * IBM-double-double type, which is a pair of IBM hexadecimal64 floats.
+ *
+ * See also:
+ * https://gcc.gnu.org/wiki/Ieee128PowerPCA
+ * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm
+ */
+static npy_uint32
+Dragon4_PrintFloat_IBM_double_double(
+    Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt)
+{
+    char *buffer = scratch->repr;
+    npy_uint32 bufferSize = sizeof(scratch->repr);
+    BigInt *bigints = scratch->bigints;
+
+    npy_uint32 floatExponent1, floatExponent2;
+    npy_uint64 floatMantissa1, floatMantissa2;
+    npy_uint32 floatSign1, floatSign2;
+
+    npy_uint64 mantissa1, mantissa2;
+    npy_int32 exponent1, exponent2;
+    int shift;
+    npy_uint32 mantissaBit;
+    npy_bool hasUnequalMargins;
+    char signbit = '\0';
+
+    if (bufferSize == 0) {
+        return 0;
+    }
+
+    if (bufferSize == 1) {
+        buffer[0] = '\0';
+        return 0;
+    }
+
+    /* deconstruct the floating point values */
+    floatMantissa1 = val128.hi & bitmask_u64(52);
+    floatExponent1 = (val128.hi >> 52) & bitmask_u32(11);
+    floatSign1 = (val128.hi >> 63) != 0;
+
+    floatMantissa2 = val128.lo & bitmask_u64(52);
+    floatExponent2 = (val128.lo >> 52) & bitmask_u32(11);
+    floatSign2 = (val128.lo >> 63) != 0;
+
+    /* output the sign using 1st float's sign */
+    if (floatSign1) {
+        signbit = '-';
+    }
+    else if (opt->sign) {
+        signbit = '+';
+    }
+
+    /* we only need to look at the first float for inf/nan */
+    if (floatExponent1 == bitmask_u32(11)) {
+        return PrintInfNan(buffer, bufferSize, floatMantissa1, 13, signbit);
+    }
+
+    /* else this is a number */
+
+    /* Factor the 1st value into its parts, see binary64 for comments. */
+    if (floatExponent1 == 0) {
+        /*
+         * If the first number is a subnormal value, the 2nd has to be 0 for
+         * the float128 to be normalized, so we can ignore it. In this case
+         * the float128 only has the precision of a single binary64 value.
+         */
+        mantissa1            = floatMantissa1;
+        exponent1            = 1 - 1023 - 52;
+        mantissaBit          = LogBase2_64(mantissa1);
+        hasUnequalMargins    = NPY_FALSE;
+
+        BigInt_Set_uint64(&bigints[0], mantissa1);
+    }
+    else {
+        mantissa1            = (1ull << 52) | floatMantissa1;
+        exponent1            = floatExponent1 - 1023 - 52;
+        mantissaBit          = 52 + 53;
+
+        /*
+         * Computing hasUnequalMargins and mantissaBit:
+         * This is a little trickier than for IEEE formats.
+         *
+         * When both doubles are "normal" it is clearer since we can think of
+         * it as an IEEE type with a 106 bit mantissa. This value can never
+         * have "unequal" margins because of the implied 1 bit in the 2nd
+         * value.  (unequal margins only happen when the mantissa has a value
+         * like "10000000000...", all zeros except the implied bit at the
+         * start, since the next lowest number has a different exponent).
+         * mantissaBits will always be 52+53 in this case.
+         *
+         * If the 1st number is a very small normal, and the 2nd is subnormal
+         * and not 2^52 times smaller, the number behaves like a subnormal
+         * overall, where the upper number just adds some bits on the left.
+         * Like usual subnormals, it has "equal" margins. The slightly tricky
+         * thing is that the number of mantissaBits varies. It will be 52
+         * (from lower double) plus a variable number depending on the upper
+         * number's exponent. We recompute the number of bits in the shift
+         * calculation below, because the shift will be equal to the number of
+         * lost bits.
+         *
+         * We can get unequal margins only if the first value has all-0
+         * mantissa (except implied bit), and the second value is exactly 0. As
+         * a special exception the smallest normal value (smallest exponent, 0
+         * mantissa) should have equal margins, since it is "next to" a
+         * subnormal value.
+         */
+
+        /* factor the 2nd value into its parts */
+        if (floatExponent2 != 0) {
+            mantissa2            = (1ull << 52) | floatMantissa2;
+            exponent2            = floatExponent2 - 1023 - 52;
+            hasUnequalMargins    = NPY_FALSE;
+        }
+        else {
+            /* shift exp by one so that leading mantissa bit is still bit 53 */
+            mantissa2            = floatMantissa2 << 1;
+            exponent2            = - 1023 - 52;
+            hasUnequalMargins  = (floatExponent1 != 1) && (floatMantissa1 == 0)
+                                                       && (floatMantissa2 == 0);
+        }
+
+        /*
+         * The 2nd val's exponent might not be exactly 52 smaller than the 1st,
+         * it can vary a little bit. So do some shifting of the low mantissa,
+         * so that the total mantissa is equivalent to bits 53 to 0 of the
+         * first double immediately followed by bits 53 to 0 of the second.
+         */
+        shift = exponent1 - exponent2 - 53;
+        if (shift > 0) {
+            /* shift more than 64 is undefined behavior */
+            mantissa2 = shift < 64 ? mantissa2 >> shift : 0;
+        }
+        else if (shift < 0) {
+            /*
+             * This only happens if the 2nd value is subnormal.
+             * We expect that shift > -64, but check it anyway
+             */
+            mantissa2 = -shift < 64 ? mantissa2 << -shift : 0;
+        }
+
+        /*
+         * If the low double is a different sign from the high double,
+         * rearrange so that the total mantissa is the sum of the two
+         * mantissas, instead of a subtraction.
+         * hi - lo  ->  (hi-1) + (1-lo),   where lo < 1
+         */
+        if (floatSign1 != floatSign2 && mantissa2 != 0) {
+            mantissa1--;
+            mantissa2 = (1ull << 53) - mantissa2;
+        }
+
+        /*
+         * Compute the number of bits if we are in the subnormal range.
+         * The value "shift" happens to be exactly the number of lost bits.
+         * Also, shift the bits so that the least significant bit is at
+         * bit position 0, like a typical subnormal. After this exponent1
+         * should always be 2^-1022
+         */
+        if (shift < 0) {
+            mantissa2 = (mantissa2 >> -shift) | (mantissa1 << (53 + shift));
+            mantissa1 = mantissa1 >> -shift;
+            mantissaBit = mantissaBit -(-shift);
+            exponent1 -= shift;
+            DEBUG_ASSERT(exponent1 == -1022);
+        }
+
+        /*
+         * set up the BigInt mantissa, by shifting the parts as needed
+         * We can use | instead of + since the mantissas should not overlap
+         */
+        BigInt_Set_2x_uint64(&bigints[0], mantissa1 >> 11,
+                                         (mantissa1 << 53) | (mantissa2));
+        exponent1 = exponent1 - 53;
+    }
+
+    return Format_floatbits(buffer, bufferSize, bigints, exponent1,
+                            signbit, mantissaBit, hasUnequalMargins, opt);
+}
+
+#if defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)
+static npy_uint32
+Dragon4_PrintFloat_IBM_double_double_le(
+    Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+{
+    FloatVal128 val128;
+    FloatUnion128 buf128;
+
+    buf128.floatingPoint = *value;
+    val128.lo = buf128.integer.a;
+    val128.hi = buf128.integer.b;
+
+    return Dragon4_PrintFloat_IBM_double_double(scratch, val128, opt);
+}
+#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE */
+
+#if defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)
+static npy_uint32
+Dragon4_PrintFloat_IBM_double_double_be(
+    Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt)
+{
+    FloatVal128 val128;
+    FloatUnion128 buf128;
+
+    buf128.floatingPoint = *value;
+    val128.hi = buf128.integer.a;
+    val128.lo = buf128.integer.b;
+
+    return Dragon4_PrintFloat_IBM_double_double(scratch, val128, opt);
+}
+
+#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */
+
+#endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE | HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */
+
 #endif /* NPY_FLOAT128 */
 
 
diff --git a/numpy/core/src/multiarray/dragon4.h b/numpy/core/src/multiarray/dragon4.h
index 176b79f69..383a0949d 100644
--- a/numpy/core/src/multiarray/dragon4.h
+++ b/numpy/core/src/multiarray/dragon4.h
@@ -75,6 +75,10 @@
     #define NPY_LONGDOUBLE_BINFMT_NAME Intel_extended128
 #elif defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE)
     #define NPY_LONGDOUBLE_BINFMT_NAME Motorola_extended96
+#elif defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)
+    #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double_le
+#elif defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)
+    #define NPY_LONGDOUBLE_BINFMT_NAME IBM_double_double_be
 #else
     #error No long double representation defined
 #endif
diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h
index 7ee564239..e4a919db6 100644
--- a/numpy/core/src/npymath/npy_math_private.h
+++ b/numpy/core/src/npymath/npy_math_private.h
@@ -434,8 +434,8 @@ do {                                                            \
     typedef npy_uint32 ldouble_sign_t;
 #endif
 
-#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \
-    !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)
+#if !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) && \
+    !defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE)
 /* Get the sign bit of x. x should be of type IEEEl2bitsrep */
 #define GET_LDOUBLE_SIGN(x) \
     (((x).a[LDBL_SIGN_INDEX] & LDBL_SIGN_MASK) >> LDBL_SIGN_SHIFT)
diff --git a/numpy/core/src/private/npy_fpmath.h b/numpy/core/src/private/npy_fpmath.h
index e1521de3b..dbb3fb23d 100644
--- a/numpy/core/src/private/npy_fpmath.h
+++ b/numpy/core/src/private/npy_fpmath.h
@@ -14,9 +14,17 @@
       defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \
       defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \
       defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) || \
-      defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \
-      defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE))
+      defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \
+      defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE))
     #error No long double representation defined
 #endif
 
+/* for back-compat, also keep old name for double-double */
+#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE
+    #define HAVE_LDOUBLE_DOUBLE_DOUBLE_LE
+#endif
+#ifdef HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE
+    #define HAVE_LDOUBLE_DOUBLE_DOUBLE_BE
+#endif
+
 #endif
diff --git a/numpy/core/tests/test_scalarprint.py b/numpy/core/tests/test_scalarprint.py
index a20ec9f74..472ff691d 100644
--- a/numpy/core/tests/test_scalarprint.py
+++ b/numpy/core/tests/test_scalarprint.py
@@ -5,10 +5,12 @@
 from __future__ import division, absolute_import, print_function
 
 import code, sys
+import platform
+import pytest
+
 from tempfile import TemporaryFile
 import numpy as np
-from numpy.testing import assert_, assert_equal, suppress_warnings
-
+from numpy.testing import assert_, assert_equal, suppress_warnings, dec
 
 class TestRealScalars(object):
     def test_str(self):
@@ -250,6 +252,66 @@ class TestRealScalars(object):
                          "1.2" if tp != np.float16 else "1.2002")
             assert_equal(fpos(tp('1.'), trim='-'), "1")
 
+    @pytest.mark.skipif(not platform.machine().startswith("ppc64"),
+                        reason="only applies to ppc float128 values")
+    def test_ppc64_ibm_double_double128(self):
+        # check that the precision decreases once we get into the subnormal
+        # range. Unlike float64, this starts around 1e-292 instead of 1e-308,
+        # which happens when the first double is normal and the second is
+        # subnormal.
+        x = np.float128('2.123123123123123123123123123123123e-286')
+        got = [str(x/np.float128('2e' + str(i))) for i in range(0,40)]
+        expected = [
+            "1.06156156156156156156156156156157e-286",
+            "1.06156156156156156156156156156158e-287",
+            "1.06156156156156156156156156156159e-288",
+            "1.0615615615615615615615615615616e-289",
+            "1.06156156156156156156156156156157e-290",
+            "1.06156156156156156156156156156156e-291",
+            "1.0615615615615615615615615615616e-292",
+            "1.0615615615615615615615615615615e-293",
+            "1.061561561561561561561561561562e-294",
+            "1.06156156156156156156156156155e-295",
+            "1.0615615615615615615615615616e-296",
+            "1.06156156156156156156156156e-297",
+            "1.06156156156156156156156157e-298",
+            "1.0615615615615615615615616e-299",
+            "1.06156156156156156156156e-300",
+            "1.06156156156156156156155e-301",
+            "1.0615615615615615615616e-302",
+            "1.061561561561561561562e-303",
+            "1.06156156156156156156e-304",
+            "1.0615615615615615618e-305",
+            "1.06156156156156156e-306",
+            "1.06156156156156157e-307",
+            "1.0615615615615616e-308",
+            "1.06156156156156e-309",
+            "1.06156156156157e-310",
+            "1.0615615615616e-311",
+            "1.06156156156e-312",
+            "1.06156156154e-313",
+            "1.0615615616e-314",
+            "1.06156156e-315",
+            "1.06156155e-316",
+            "1.061562e-317",
+            "1.06156e-318",
+            "1.06155e-319",
+            "1.0617e-320",
+            "1.06e-321",
+            "1.04e-322",
+            "1e-323",
+            "0.0",
+            "0.0"]
+        assert_equal(got, expected)
+
+        # Note: we follow glibc behavior, but it (or gcc) might not be right.
+        # In particular we can get two values that print the same but are not
+        # equal:
+        a = np.float128('2')/np.float128('3')
+        b = np.float128(str(a))
+        assert_equal(str(a), str(b))
+        assert_(a != b)
+
     def float32_roundtrip(self):
         # gh-9360
         x = np.float32(1024 - 2**-14)