/* * Copyright (c) 2014 Ryan Juckett * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /* * This file contains a modified version of Ryan Juckett's Dragon4 * implementation, obtained from http://www.ryanjuckett.com, * which has been ported from C++ to C and which has * modifications specific to printing floats in numpy. * * Ryan Juckett's original code was under the Zlib license; he gave numpy * permission to include it under the MIT license instead. */ #include "dragon4.h" #include #include #include #include #include #if 0 #define DEBUG_ASSERT(stmnt) assert(stmnt) #else #define DEBUG_ASSERT(stmnt) do {} while(0) #endif static inline npy_uint64 bitmask_u64(npy_uint32 n) { return ~(~((npy_uint64)0) << n); } static inline npy_uint32 bitmask_u32(npy_uint32 n) { return ~(~((npy_uint32)0) << n); } /* * Get the log base 2 of a 32-bit unsigned integer. * http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup */ static npy_uint32 LogBase2_32(npy_uint32 val) { static const npy_uint8 logTable[256] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; npy_uint32 temp; temp = val >> 24; if (temp) { return 24 + logTable[temp]; } temp = val >> 16; if (temp) { return 16 + logTable[temp]; } temp = val >> 8; if (temp) { return 8 + logTable[temp]; } return logTable[val]; } static npy_uint32 LogBase2_64(npy_uint64 val) { npy_uint64 temp; temp = val >> 32; if (temp) { return 32 + LogBase2_32((npy_uint32)temp); } return LogBase2_32((npy_uint32)val); } #if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE) static npy_uint32 LogBase2_128(npy_uint64 hi, npy_uint64 lo) { if (hi) { return 64 + LogBase2_64(hi); } return LogBase2_64(lo); } #endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ /* * Maximum number of 32 bit blocks needed in high precision arithmetic to print * out 128 bit IEEE floating point values. 1023 chosen to be large enough for * 128 bit floats, and BigInt is exactly 4kb (nice for page/cache?) */ #define c_BigInt_MaxBlocks 1023 /* * This structure stores a high precision unsigned integer. It uses a buffer of * 32 bit integer blocks along with a length. The lowest bits of the integer * are stored at the start of the buffer and the length is set to the minimum * value that contains the integer. Thus, there are never any zero blocks at * the end of the buffer. */ typedef struct BigInt { npy_uint32 length; npy_uint32 blocks[c_BigInt_MaxBlocks]; } BigInt; /* * Dummy implementation of a memory manager for BigInts. Currently, only * supports a single call to Dragon4, but that is OK because Dragon4 * does not release the GIL. * * We try to raise an error anyway if dragon4 re-enters, and this code serves * as a placeholder if we want to make it re-entrant in the future. * * Each call to dragon4 uses 7 BigInts. */ #define BIGINT_DRAGON4_GROUPSIZE 7 typedef struct { BigInt bigints[BIGINT_DRAGON4_GROUPSIZE]; char repr[16384]; } Dragon4_Scratch; static int _bigint_static_in_use = 0; static Dragon4_Scratch _bigint_static; static Dragon4_Scratch* get_dragon4_bigint_scratch(void) { /* this test+set is not threadsafe, but no matter because we have GIL */ if (_bigint_static_in_use) { PyErr_SetString(PyExc_RuntimeError, "numpy float printing code is not re-entrant. " "Ping the devs to fix it."); return NULL; } _bigint_static_in_use = 1; /* in this dummy implementation we only return the static allocation */ return &_bigint_static; } static void free_dragon4_bigint_scratch(Dragon4_Scratch *mem){ _bigint_static_in_use = 0; } /* Copy integer */ static void BigInt_Copy(BigInt *dst, const BigInt *src) { npy_uint32 length = src->length; npy_uint32 * dstp = dst->blocks; const npy_uint32 *srcp; for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) { *dstp = *srcp; } dst->length = length; } /* Basic type accessors */ static void BigInt_Set_uint64(BigInt *i, npy_uint64 val) { if (val > bitmask_u64(32)) { i->blocks[0] = val & bitmask_u64(32); i->blocks[1] = (val >> 32) & bitmask_u64(32); i->length = 2; } else if (val != 0) { i->blocks[0] = val & bitmask_u64(32); i->length = 1; } else { i->length = 0; } } #if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE) || \ defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || \ defined(HAVE_LDOUBLE_IEEE_QUAD_BE)) static void BigInt_Set_2x_uint64(BigInt *i, npy_uint64 hi, npy_uint64 lo) { if (hi > bitmask_u64(32)) { i->length = 4; } else if (hi != 0) { i->length = 3; } else if (lo > bitmask_u64(32)) { i->length = 2; } else if (lo != 0) { i->length = 1; } else { i->length = 0; } /* Note deliberate fallthrough in this switch */ switch (i->length) { case 4: i->blocks[3] = (hi >> 32) & bitmask_u64(32); case 3: i->blocks[2] = hi & bitmask_u64(32); case 2: i->blocks[1] = (lo >> 32) & bitmask_u64(32); case 1: i->blocks[0] = lo & bitmask_u64(32); } } #endif /* DOUBLE_DOUBLE and QUAD */ static void BigInt_Set_uint32(BigInt *i, npy_uint32 val) { if (val != 0) { i->blocks[0] = val; i->length = 1; } else { i->length = 0; } } /* * Returns 1 if the value is zero */ static int BigInt_IsZero(const BigInt *i) { return i->length == 0; } /* * Returns 1 if the value is even */ static int BigInt_IsEven(const BigInt *i) { return (i->length == 0) || ( (i->blocks[0] % 2) == 0); } /* * Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs) */ static npy_int32 BigInt_Compare(const BigInt *lhs, const BigInt *rhs) { int i; /* A bigger length implies a bigger number. */ npy_int32 lengthDiff = lhs->length - rhs->length; if (lengthDiff != 0) { return lengthDiff; } /* Compare blocks one by one from high to low. */ for (i = lhs->length - 1; i >= 0; --i) { if (lhs->blocks[i] == rhs->blocks[i]) { continue; } else if (lhs->blocks[i] > rhs->blocks[i]) { return 1; } else { return -1; } } /* no blocks differed */ return 0; } /* result = lhs + rhs */ static void BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs) { /* determine which operand has the smaller length */ const BigInt *large, *small; npy_uint64 carry = 0; const npy_uint32 *largeCur, *smallCur, *largeEnd, *smallEnd; npy_uint32 *resultCur; if (lhs->length < rhs->length) { small = lhs; large = rhs; } else { small = rhs; large = lhs; } /* The output will be at least as long as the largest input */ result->length = large->length; /* Add each block and add carry the overflow to the next block */ largeCur = large->blocks; largeEnd = largeCur + large->length; smallCur = small->blocks; smallEnd = smallCur + small->length; resultCur = result->blocks; while (smallCur != smallEnd) { npy_uint64 sum = carry + (npy_uint64)(*largeCur) + (npy_uint64)(*smallCur); carry = sum >> 32; *resultCur = sum & bitmask_u64(32); ++largeCur; ++smallCur; ++resultCur; } /* Add the carry to any blocks that only exist in the large operand */ while (largeCur != largeEnd) { npy_uint64 sum = carry + (npy_uint64)(*largeCur); carry = sum >> 32; (*resultCur) = sum & bitmask_u64(32); ++largeCur; ++resultCur; } /* If there's still a carry, append a new block */ if (carry != 0) { DEBUG_ASSERT(carry == 1); DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) == large->length && (large->length < c_BigInt_MaxBlocks)); *resultCur = 1; result->length = large->length + 1; } else { result->length = large->length; } } /* * result = lhs * rhs */ static void BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs) { const BigInt *large; const BigInt *small; npy_uint32 maxResultLen; npy_uint32 *cur, *end, *resultStart; const npy_uint32 *smallCur; DEBUG_ASSERT(result != lhs && result != rhs); /* determine which operand has the smaller length */ if (lhs->length < rhs->length) { small = lhs; large = rhs; } else { small = rhs; large = lhs; } /* set the maximum possible result length */ maxResultLen = large->length + small->length; DEBUG_ASSERT(maxResultLen <= c_BigInt_MaxBlocks); /* clear the result data */ for (cur = result->blocks, end = cur + maxResultLen; cur != end; ++cur) { *cur = 0; } /* perform standard long multiplication for each small block */ resultStart = result->blocks; for (smallCur = small->blocks; smallCur != small->blocks + small->length; ++smallCur, ++resultStart) { /* * if non-zero, multiply against all the large blocks and add into the * result */ const npy_uint32 multiplier = *smallCur; if (multiplier != 0) { const npy_uint32 *largeCur = large->blocks; npy_uint32 *resultCur = resultStart; npy_uint64 carry = 0; do { npy_uint64 product = (*resultCur) + (*largeCur)*(npy_uint64)multiplier + carry; carry = product >> 32; *resultCur = product & bitmask_u64(32); ++largeCur; ++resultCur; } while(largeCur != large->blocks + large->length); DEBUG_ASSERT(resultCur < result->blocks + maxResultLen); *resultCur = (npy_uint32)(carry & bitmask_u64(32)); } } /* check if the terminating block has no set bits */ if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) { result->length = maxResultLen-1; } else { result->length = maxResultLen; } } /* result = lhs * rhs */ static void BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs) { /* perform long multiplication */ npy_uint32 carry = 0; npy_uint32 *resultCur = result->blocks; const npy_uint32 *pLhsCur = lhs->blocks; const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length; for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry; *resultCur = (npy_uint32)(product & bitmask_u64(32)); carry = product >> 32; } /* if there is a remaining carry, grow the array */ if (carry != 0) { /* grow the array */ DEBUG_ASSERT(lhs->length + 1 <= c_BigInt_MaxBlocks); *resultCur = (npy_uint32)carry; result->length = lhs->length + 1; } else { result->length = lhs->length; } } /* result = in * 2 */ static void BigInt_Multiply2(BigInt *result, const BigInt *in) { /* shift all the blocks by one */ npy_uint32 carry = 0; npy_uint32 *resultCur = result->blocks; const npy_uint32 *pLhsCur = in->blocks; const npy_uint32 *pLhsEnd = in->blocks + in->length; for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) { npy_uint32 cur = *pLhsCur; *resultCur = (cur << 1) | carry; carry = cur >> 31; } if (carry != 0) { /* grow the array */ DEBUG_ASSERT(in->length + 1 <= c_BigInt_MaxBlocks); *resultCur = carry; result->length = in->length + 1; } else { result->length = in->length; } } /* result = result * 2 */ static void BigInt_Multiply2_inplace(BigInt *result) { /* shift all the blocks by one */ npy_uint32 carry = 0; npy_uint32 *cur = result->blocks; npy_uint32 *end = result->blocks + result->length; for ( ; cur != end; ++cur) { npy_uint32 tmpcur = *cur; *cur = (tmpcur << 1) | carry; carry = tmpcur >> 31; } if (carry != 0) { /* grow the array */ DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks); *cur = carry; ++result->length; } } /* result = result * 10 */ static void BigInt_Multiply10(BigInt *result) { /* multiply all the blocks */ npy_uint64 carry = 0; npy_uint32 *cur = result->blocks; npy_uint32 *end = result->blocks + result->length; for ( ; cur != end; ++cur) { npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry; (*cur) = (npy_uint32)(product & bitmask_u64(32)); carry = product >> 32; } if (carry != 0) { /* grow the array */ DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks); *cur = (npy_uint32)carry; ++result->length; } } static npy_uint32 g_PowerOf10_U32[] = { 1, /* 10 ^ 0 */ 10, /* 10 ^ 1 */ 100, /* 10 ^ 2 */ 1000, /* 10 ^ 3 */ 10000, /* 10 ^ 4 */ 100000, /* 10 ^ 5 */ 1000000, /* 10 ^ 6 */ 10000000, /* 10 ^ 7 */ }; /* * Note: This has a lot of wasted space in the big integer structures of the * early table entries. It wouldn't be terribly hard to make the multiply * function work on integer pointers with an array length instead of * the BigInt struct which would allow us to store a minimal amount of * data here. */ static BigInt g_PowerOf10_Big[] = { /* 10 ^ 8 */ { 1, { 100000000 } }, /* 10 ^ 16 */ { 2, { 0x6fc10000, 0x002386f2 } }, /* 10 ^ 32 */ { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } }, /* 10 ^ 64 */ { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, 0xe93ff9f4, 0x00184f03, } }, /* 10 ^ 128 */ { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01, 0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08, 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } }, /* 10 ^ 256 */ { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7, } }, /* 10 ^ 512 */ { 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f, 0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1, 0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea, 0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae, 0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0, 0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac, 0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13, 0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } }, /* 10 ^ 1024 */ { 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b, 0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a, 0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f, 0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b, 0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07, 0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5, 0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc, 0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb, 0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66, 0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8, 0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f, 0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b, 0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06, 0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef, 0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94, 0xd9d61a05, 0x00000325, } }, /* 10 ^ 2048 */ { 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001, 0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354, 0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773, 0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1, 0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f, 0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1, 0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643, 0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92, 0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44, 0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974, 0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510, 0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466, 0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e, 0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62, 0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef, 0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19, 0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e, 0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8, 0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b, 0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b, 0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943, 0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4, 0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34, 0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14, 0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9, 0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee, 0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d, 0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6, 0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c, 0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac, 0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } }, /* 10 ^ 4096 */ { 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d, 0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51, 0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3, 0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da, 0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78, 0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7, 0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982, 0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905, 0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47, 0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e, 0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507, 0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0, 0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f, 0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c, 0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed, 0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1, 0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb, 0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa, 0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712, 0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8, 0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b, 0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925, 0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0, 0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a, 0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8, 0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789, 0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4, 0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb, 0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9, 0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6, 0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4, 0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf, 0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904, 0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda, 0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8, 0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc, 0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724, 0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb, 0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999, 0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d, 0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b, 0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407, 0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc, 0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670, 0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df, 0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4, 0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671, 0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd, 0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb, 0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982, 0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c, 0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e, 0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f, 0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06, 0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab, 0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5, 0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21, 0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4, 0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee, 0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145, 0x00000062, } }, }; /* result = 10^exponent */ static void BigInt_Pow10(BigInt *result, npy_uint32 exponent, BigInt *temp) { /* use two temporary values to reduce large integer copy operations */ BigInt *curTemp = result; BigInt *pNextTemp = temp; npy_uint32 smallExponent; npy_uint32 tableIdx = 0; /* make sure the exponent is within the bounds of the lookup table data */ DEBUG_ASSERT(exponent < 8192); /* * initialize the result by looking up a 32-bit power of 10 corresponding to * the first 3 bits */ smallExponent = exponent & bitmask_u32(3); BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]); /* remove the low bits that we used for the 32-bit lookup table */ exponent >>= 3; /* while there are remaining bits in the exponent to be processed */ while (exponent != 0) { /* if the current bit is set, multiply by this power of 10 */ if (exponent & 1) { BigInt *pSwap; /* multiply into the next temporary */ BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); /* swap to the next temporary */ pSwap = curTemp; curTemp = pNextTemp; pNextTemp = pSwap; } /* advance to the next bit */ ++tableIdx; exponent >>= 1; } /* output the result */ if (curTemp != result) { BigInt_Copy(result, curTemp); } } /* in = in * 10^exponent */ static void BigInt_MultiplyPow10(BigInt *in, npy_uint32 exponent, BigInt *temp) { /* use two temporary values to reduce large integer copy operations */ BigInt *curTemp, *pNextTemp; npy_uint32 smallExponent; npy_uint32 tableIdx = 0; /* make sure the exponent is within the bounds of the lookup table data */ DEBUG_ASSERT(exponent < 8192); /* * initialize the result by looking up a 32-bit power of 10 corresponding to * the first 3 bits */ smallExponent = exponent & bitmask_u32(3); if (smallExponent != 0) { BigInt_Multiply_int(temp, in, g_PowerOf10_U32[smallExponent]); curTemp = temp; pNextTemp = in; } else { curTemp = in; pNextTemp = temp; } /* remove the low bits that we used for the 32-bit lookup table */ exponent >>= 3; /* while there are remaining bits in the exponent to be processed */ while (exponent != 0) { /* if the current bit is set, multiply by this power of 10 */ if (exponent & 1) { BigInt *pSwap; /* multiply into the next temporary */ BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]); /* swap to the next temporary */ pSwap = curTemp; curTemp = pNextTemp; pNextTemp = pSwap; } /* advance to the next bit */ ++tableIdx; exponent >>= 1; } /* output the result */ if (curTemp != in){ BigInt_Copy(in, curTemp); } } /* result = 2^exponent */ static inline void BigInt_Pow2(BigInt *result, npy_uint32 exponent) { npy_uint32 bitIdx; npy_uint32 blockIdx = exponent / 32; npy_uint32 i; DEBUG_ASSERT(blockIdx < c_BigInt_MaxBlocks); for (i = 0; i <= blockIdx; ++i) { result->blocks[i] = 0; } result->length = blockIdx + 1; bitIdx = (exponent % 32); result->blocks[blockIdx] |= ((npy_uint32)1 << bitIdx); } /* * This function will divide two large numbers under the assumption that the * result is within the range [0,10) and the input numbers have been shifted * to satisfy: * - The highest block of the divisor is greater than or equal to 8 such that * there is enough precision to make an accurate first guess at the quotient. * - The highest block of the divisor is less than the maximum value on an * unsigned 32-bit integer such that we can safely increment without overflow. * - The dividend does not contain more blocks than the divisor such that we * can estimate the quotient by dividing the equivalently placed high blocks. * * quotient = floor(dividend / divisor) * remainder = dividend - quotient*divisor * * dividend is updated to be the remainder and the quotient is returned. */ static npy_uint32 BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor) { npy_uint32 length, quotient; const npy_uint32 *finalDivisorBlock; npy_uint32 *finalDividendBlock; /* * Check that the divisor has been correctly shifted into range and that it * is not smaller than the dividend in length. */ DEBUG_ASSERT(!divisor->length == 0 && divisor->blocks[divisor->length-1] >= 8 && divisor->blocks[divisor->length-1] < bitmask_u64(32) && dividend->length <= divisor->length); /* * If the dividend is smaller than the divisor, the quotient is zero and the * divisor is already the remainder. */ length = divisor->length; if (dividend->length < divisor->length) { return 0; } finalDivisorBlock = divisor->blocks + length - 1; finalDividendBlock = dividend->blocks + length - 1; /* * Compute an estimated quotient based on the high block value. This will * either match the actual quotient or undershoot by one. */ quotient = *finalDividendBlock / (*finalDivisorBlock + 1); DEBUG_ASSERT(quotient <= 9); /* Divide out the estimated quotient */ if (quotient != 0) { /* dividend = dividend - divisor*quotient */ const npy_uint32 *divisorCur = divisor->blocks; npy_uint32 *dividendCur = dividend->blocks; npy_uint64 borrow = 0; npy_uint64 carry = 0; do { npy_uint64 difference, product; product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry; carry = product >> 32; difference = (npy_uint64)*dividendCur - (product & bitmask_u64(32)) - borrow; borrow = (difference >> 32) & 1; *dividendCur = difference & bitmask_u64(32); ++divisorCur; ++dividendCur; } while(divisorCur <= finalDivisorBlock); /* remove all leading zero blocks from dividend */ while (length > 0 && dividend->blocks[length - 1] == 0) { --length; } dividend->length = length; } /* * If the dividend is still larger than the divisor, we overshot our * estimate quotient. To correct, we increment the quotient and subtract one * more divisor from the dividend. */ if (BigInt_Compare(dividend, divisor) >= 0) { /* dividend = dividend - divisor */ const npy_uint32 *divisorCur = divisor->blocks; npy_uint32 *dividendCur = dividend->blocks; npy_uint64 borrow = 0; ++quotient; do { npy_uint64 difference = (npy_uint64)*dividendCur - (npy_uint64)*divisorCur - borrow; borrow = (difference >> 32) & 1; *dividendCur = difference & bitmask_u64(32); ++divisorCur; ++dividendCur; } while(divisorCur <= finalDivisorBlock); /* remove all leading zero blocks from dividend */ while (length > 0 && dividend->blocks[length - 1] == 0) { --length; } dividend->length = length; } return quotient; } /* result = result << shift */ static void BigInt_ShiftLeft(BigInt *result, npy_uint32 shift) { npy_uint32 shiftBlocks = shift / 32; npy_uint32 shiftBits = shift % 32; /* process blocks high to low so that we can safely process in place */ const npy_uint32 *pInBlocks = result->blocks; npy_int32 inLength = result->length; npy_uint32 *pInCur, *pOutCur; DEBUG_ASSERT(inLength + shiftBlocks < c_BigInt_MaxBlocks); DEBUG_ASSERT(shift != 0); /* check if the shift is block aligned */ if (shiftBits == 0) { npy_uint32 i; /* copy blocks from high to low */ for (pInCur = result->blocks + result->length, pOutCur = pInCur + shiftBlocks; pInCur >= pInBlocks; --pInCur, --pOutCur) { *pOutCur = *pInCur; } /* zero the remaining low blocks */ for (i = 0; i < shiftBlocks; ++i) { result->blocks[i] = 0; } result->length += shiftBlocks; } /* else we need to shift partial blocks */ else { npy_uint32 i; npy_int32 inBlockIdx = inLength - 1; npy_uint32 outBlockIdx = inLength + shiftBlocks; /* output the initial blocks */ const npy_uint32 lowBitsShift = (32 - shiftBits); npy_uint32 highBits = 0; npy_uint32 block = result->blocks[inBlockIdx]; npy_uint32 lowBits = block >> lowBitsShift; /* set the length to hold the shifted blocks */ DEBUG_ASSERT(outBlockIdx < c_BigInt_MaxBlocks); result->length = outBlockIdx + 1; while (inBlockIdx > 0) { result->blocks[outBlockIdx] = highBits | lowBits; highBits = block << shiftBits; --inBlockIdx; --outBlockIdx; block = result->blocks[inBlockIdx]; lowBits = block >> lowBitsShift; } /* output the final blocks */ DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1); result->blocks[outBlockIdx] = highBits | lowBits; result->blocks[outBlockIdx-1] = block << shiftBits; /* zero the remaining low blocks */ for (i = 0; i < shiftBlocks; ++i) { result->blocks[i] = 0; } /* check if the terminating block has no set bits */ if (result->blocks[result->length - 1] == 0) { --result->length; } } } /* * This is an implementation the Dragon4 algorithm to convert a binary number in * floating point format to a decimal number in string format. The function * returns the number of digits written to the output buffer and the output is * not NUL terminated. * * The floating point input value is (mantissa * 2^exponent). * * See the following papers for more information on the algorithm: * "How to Print Floating-Point Numbers Accurately" * Steele and White * http://kurtstephens.com/files/p372-steele.pdf * "Printing Floating-Point Numbers Quickly and Accurately" * Burger and Dybvig * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656 * * This implementation is essentially a port of the "Figure 3" Scheme code from * Burger and Dybvig, but with the following additional differences: * 1. Instead of finding the highest k such that high < B**k, we search * for the one where v < B**k. This has a downside that if a power * of 10 exists between v and high, we will output a 9 instead of a 1 as * first digit, violating the "no-carry" guarantee of the paper. This is * accounted for in a new post-processing loop which implements a carry * operation. The upside is one less BigInt multiplication. * 2. The approximate value of k found is offset by a different amount * (0.69), in order to hit the "fast" branch more often. This is * extensively described on Ryan Juckett's website. * 3. The fixed precision mode is much simpler than proposed in the paper. * It simply outputs digits by repeatedly dividing by 10. The new "carry" * loop at the end rounds this output nicely. * There is also some new code to account for details of the BigInt * implementation, which are not present in the paper since it does not specify * details of the integer calculations. * * There is some more documentation of these changes on Ryan Juckett's website * at http://www.ryanjuckett.com/programming/printing-floating-point-numbers/ * * This code also has a few implementation differences from Ryan Juckett's * version: * 1. fixed overflow problems when mantissa was 64 bits (in float128 types), * by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls. * 2. Increased c_BigInt_MaxBlocks, for 128-bit floats * 3. Added more entries to the g_PowerOf10_Big table, for 128-bit floats. * 4. Added unbiased rounding calculation with isEven. Ryan Juckett's * implementation did not implement "IEEE unbiased rounding", except in the * last digit. This has been added back, following the Burger & Dybvig * code, using the isEven variable. * * Arguments: * * bigints - memory to store all bigints needed (7) for dragon4 computation. * The first BigInt should be filled in with the mantissa. * * exponent - value exponent in base 2 * * mantissaBit - index of the highest set mantissa bit * * hasUnequalMargins - is the high margin twice as large as the low margin * * cutoffMode - how to interpret cutoff_*: fractional or total digits? * * cutoff_max - cut off printing after this many digits. -1 for no cutoff * * cutoff_min - print at least this many digits. -1 for no cutoff * * pOutBuffer - buffer to output into * * bufferSize - maximum characters that can be printed to pOutBuffer * * pOutExponent - the base 10 exponent of the first digit * * Returns the number of digits written to the output buffer. */ static npy_uint32 Dragon4(BigInt *bigints, const npy_int32 exponent, const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins, const DigitMode digitMode, const CutoffMode cutoffMode, npy_int32 cutoff_max, npy_int32 cutoff_min, char *pOutBuffer, npy_uint32 bufferSize, npy_int32 *pOutExponent) { char *curDigit = pOutBuffer; /* * We compute values in integer format by rescaling as * mantissa = scaledValue / scale * marginLow = scaledMarginLow / scale * marginHigh = scaledMarginHigh / scale * Here, marginLow and marginHigh represent 1/2 of the distance to the next * floating point value above/below the mantissa. * * scaledMarginHigh will point to scaledMarginLow in the case they must be * equal to each other, otherwise it will point to optionalMarginHigh. */ BigInt *mantissa = &bigints[0]; /* the only initialized bigint */ BigInt *scale = &bigints[1]; BigInt *scaledValue = &bigints[2]; BigInt *scaledMarginLow = &bigints[3]; BigInt *scaledMarginHigh; BigInt *optionalMarginHigh = &bigints[4]; BigInt *temp1 = &bigints[5]; BigInt *temp2 = &bigints[6]; const npy_float64 log10_2 = 0.30102999566398119521373889472449; npy_int32 digitExponent, hiBlock; npy_int32 cutoff_max_Exponent, cutoff_min_Exponent; npy_uint32 outputDigit; /* current digit being output */ npy_uint32 outputLen; npy_bool isEven = BigInt_IsEven(mantissa); npy_int32 cmp; /* values used to determine how to round */ npy_bool low, high, roundDown; DEBUG_ASSERT(bufferSize > 0); /* if the mantissa is zero, the value is zero regardless of the exponent */ if (BigInt_IsZero(mantissa)) { *curDigit = '0'; *pOutExponent = 0; return 1; } BigInt_Copy(scaledValue, mantissa); if (hasUnequalMargins) { /* if we have no fractional component */ if (exponent > 0) { /* * 1) Expand the input value by multiplying out the mantissa and * exponent. This represents the input value in its whole number * representation. * 2) Apply an additional scale of 2 such that later comparisons * against the margin values are simplified. * 3) Set the margin value to the lowest mantissa bit's scale. */ /* scaledValue = 2 * 2 * mantissa*2^exponent */ BigInt_ShiftLeft(scaledValue, exponent + 2); /* scale = 2 * 2 * 1 */ BigInt_Set_uint32(scale, 4); /* scaledMarginLow = 2 * 2^(exponent-1) */ BigInt_Pow2(scaledMarginLow, exponent); /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */ BigInt_Pow2(optionalMarginHigh, exponent + 1); } /* else we have a fractional exponent */ else { /* * In order to track the mantissa data as an integer, we store it as * is with a large scale */ /* scaledValue = 2 * 2 * mantissa */ BigInt_ShiftLeft(scaledValue, 2); /* scale = 2 * 2 * 2^(-exponent) */ BigInt_Pow2(scale, -exponent + 2); /* scaledMarginLow = 2 * 2^(-1) */ BigInt_Set_uint32(scaledMarginLow, 1); /* scaledMarginHigh = 2 * 2 * 2^(-1) */ BigInt_Set_uint32(optionalMarginHigh, 2); } /* the high and low margins are different */ scaledMarginHigh = optionalMarginHigh; } else { /* if we have no fractional component */ if (exponent > 0) { /* scaledValue = 2 * mantissa*2^exponent */ BigInt_ShiftLeft(scaledValue, exponent + 1); /* scale = 2 * 1 */ BigInt_Set_uint32(scale, 2); /* scaledMarginLow = 2 * 2^(exponent-1) */ BigInt_Pow2(scaledMarginLow, exponent); } /* else we have a fractional exponent */ else { /* * In order to track the mantissa data as an integer, we store it as * is with a large scale */ /* scaledValue = 2 * mantissa */ BigInt_ShiftLeft(scaledValue, 1); /* scale = 2 * 2^(-exponent) */ BigInt_Pow2(scale, -exponent + 1); /* scaledMarginLow = 2 * 2^(-1) */ BigInt_Set_uint32(scaledMarginLow, 1); } /* the high and low margins are equal */ scaledMarginHigh = scaledMarginLow; } /* * Compute an estimate for digitExponent that will be correct or undershoot * by one. This optimization is based on the paper "Printing Floating-Point * Numbers Quickly and Accurately" by Burger and Dybvig * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656 * We perform an additional subtraction of 0.69 to increase the frequency of * a failed estimate because that lets us take a faster branch in the code. * 0.69 is chosen because 0.69 + log10(2) is less than one by a reasonable * epsilon that will account for any floating point error. * * We want to set digitExponent to floor(log10(v)) + 1 * v = mantissa*2^exponent * log2(v) = log2(mantissa) + exponent; * log10(v) = log2(v) * log10(2) * floor(log2(v)) = mantissaBit + exponent; * log10(v) - log10(2) < (mantissaBit + exponent) * log10(2) <= log10(v) * log10(v) < (mantissaBit + exponent) * log10(2) + log10(2) * <= log10(v) + log10(2) * floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2)) * <= floor(log10(v)) + 1 * * Warning: This calculation assumes npy_float64 is an IEEE-binary64 * float. This line may need to be updated if this is not the case. */ digitExponent = (npy_int32)( ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69)); /* * if the digit exponent is smaller than the smallest desired digit for * fractional cutoff, pull the digit back into legal range at which point we * will round to the appropriate value. Note that while our value for * digitExponent is still an estimate, this is safe because it only * increases the number. This will either correct digitExponent to an * accurate value or it will clamp it above the accurate value. */ if (cutoff_max >= 0 && cutoffMode == CutoffMode_FractionLength && digitExponent <= -cutoff_max) { digitExponent = -cutoff_max + 1; } /* Divide value by 10^digitExponent. */ if (digitExponent > 0) { /* A positive exponent creates a division so we multiply the scale. */ BigInt_MultiplyPow10(scale, digitExponent, temp1); } else if (digitExponent < 0) { /* * A negative exponent creates a multiplication so we multiply up the * scaledValue, scaledMarginLow and scaledMarginHigh. */ BigInt *temp=temp1, *pow10=temp2; BigInt_Pow10(pow10, -digitExponent, temp); BigInt_Multiply(temp, scaledValue, pow10); BigInt_Copy(scaledValue, temp); BigInt_Multiply(temp, scaledMarginLow, pow10); BigInt_Copy(scaledMarginLow, temp); if (scaledMarginHigh != scaledMarginLow) { BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); } } /* If (value >= 1), our estimate for digitExponent was too low */ if (BigInt_Compare(scaledValue, scale) >= 0) { /* * The exponent estimate was incorrect. * Increment the exponent and don't perform the premultiply needed * for the first loop iteration. */ digitExponent = digitExponent + 1; } else { /* * The exponent estimate was correct. * Multiply larger by the output base to prepare for the first loop * iteration. */ BigInt_Multiply10(scaledValue); BigInt_Multiply10(scaledMarginLow); if (scaledMarginHigh != scaledMarginLow) { BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); } } /* * Compute the cutoff_max exponent (the exponent of the final digit to * print). Default to the maximum size of the output buffer. */ cutoff_max_Exponent = digitExponent - bufferSize; if (cutoff_max >= 0) { npy_int32 desiredCutoffExponent; if (cutoffMode == CutoffMode_TotalLength) { desiredCutoffExponent = digitExponent - cutoff_max; if (desiredCutoffExponent > cutoff_max_Exponent) { cutoff_max_Exponent = desiredCutoffExponent; } } /* Otherwise it's CutoffMode_FractionLength. Print cutoff_max digits * past the decimal point or until we reach the buffer size */ else { desiredCutoffExponent = -cutoff_max; if (desiredCutoffExponent > cutoff_max_Exponent) { cutoff_max_Exponent = desiredCutoffExponent; } } } /* Also compute the cutoff_min exponent. */ cutoff_min_Exponent = digitExponent; if (cutoff_min >= 0) { npy_int32 desiredCutoffExponent; if (cutoffMode == CutoffMode_TotalLength) { desiredCutoffExponent = digitExponent - cutoff_min; if (desiredCutoffExponent < cutoff_min_Exponent) { cutoff_min_Exponent = desiredCutoffExponent; } } else { desiredCutoffExponent = -cutoff_min; if (desiredCutoffExponent < cutoff_min_Exponent) { cutoff_min_Exponent = desiredCutoffExponent; } } } /* Output the exponent of the first digit we will print */ *pOutExponent = digitExponent-1; /* * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we * need to scale up our values such that the highest block of the * denominator is greater than or equal to 8. We also need to guarantee that * the numerator can never have a length greater than the denominator after * each loop iteration. This requires the highest block of the denominator * to be less than or equal to 429496729 which is the highest number that * can be multiplied by 10 without overflowing to a new block. */ DEBUG_ASSERT(scale->length > 0); hiBlock = scale->blocks[scale->length - 1]; if (hiBlock < 8 || hiBlock > 429496729) { npy_uint32 hiBlockLog2, shift; /* * Perform a bit shift on all values to get the highest block of the * denominator into the range [8,429496729]. We are more likely to make * accurate quotient estimations in * BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator * values so we shift the denominator to place the highest bit at index * 27 of the highest block. This is safe because (2^28 - 1) = 268435455 * which is less than 429496729. This means that all values with a * highest bit at index 27 are within range. */ hiBlockLog2 = LogBase2_32(hiBlock); DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27); shift = (32 + 27 - hiBlockLog2) % 32; BigInt_ShiftLeft(scale, shift); BigInt_ShiftLeft(scaledValue, shift); BigInt_ShiftLeft(scaledMarginLow, shift); if (scaledMarginHigh != scaledMarginLow) { BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); } } if (digitMode == DigitMode_Unique) { /* * For the unique cutoff mode, we will try to print until we have * reached a level of precision that uniquely distinguishes this value * from its neighbors. If we run out of space in the output buffer, we * terminate early. */ for (;;) { BigInt *scaledValueHigh = temp1; digitExponent = digitExponent-1; /* divide out the scale to extract the digit */ outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); DEBUG_ASSERT(outputDigit < 10); /* update the high end of the value */ BigInt_Add(scaledValueHigh, scaledValue, scaledMarginHigh); /* * stop looping if we are far enough away from our neighboring * values (and we have printed at least the requested minimum * digits) or if we have reached the cutoff digit */ cmp = BigInt_Compare(scaledValue, scaledMarginLow); low = isEven ? (cmp <= 0) : (cmp < 0); cmp = BigInt_Compare(scaledValueHigh, scale); high = isEven ? (cmp >= 0) : (cmp > 0); if (((low | high) & (digitExponent <= cutoff_min_Exponent)) | (digitExponent == cutoff_max_Exponent)) { break; } /* store the output digit */ *curDigit = (char)('0' + outputDigit); ++curDigit; /* multiply larger by the output base */ BigInt_Multiply10(scaledValue); BigInt_Multiply10(scaledMarginLow); if (scaledMarginHigh != scaledMarginLow) { BigInt_Multiply2(scaledMarginHigh, scaledMarginLow); } } } else { /* * For exact digit mode, we will try to print until we * have exhausted all precision (i.e. all remaining digits are zeros) or * until we reach the desired cutoff digit. */ low = NPY_FALSE; high = NPY_FALSE; for (;;) { digitExponent = digitExponent-1; /* divide out the scale to extract the digit */ outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(scaledValue, scale); DEBUG_ASSERT(outputDigit < 10); if ((scaledValue->length == 0) | (digitExponent == cutoff_max_Exponent)) { break; } /* store the output digit */ *curDigit = (char)('0' + outputDigit); ++curDigit; /* multiply larger by the output base */ BigInt_Multiply10(scaledValue); } } /* default to rounding down the final digit if value got too close to 0 */ roundDown = low; /* if it is legal to round up and down */ if (low == high) { npy_int32 compare; /* * round to the closest digit by comparing value with 0.5. To do this we * need to convert the inequality to large integer values. * compare( value, 0.5 ) * compare( scale * value, scale * 0.5 ) * compare( 2 * scale * value, scale ) */ BigInt_Multiply2_inplace(scaledValue); compare = BigInt_Compare(scaledValue, scale); roundDown = compare < 0; /* * if we are directly in the middle, round towards the even digit (i.e. * IEEE rounding rules) */ if (compare == 0) { roundDown = (outputDigit & 1) == 0; } } /* print the rounded digit */ if (roundDown) { *curDigit = (char)('0' + outputDigit); ++curDigit; } else { /* handle rounding up */ if (outputDigit == 9) { /* find the first non-nine prior digit */ for (;;) { /* if we are at the first digit */ if (curDigit == pOutBuffer) { /* output 1 at the next highest exponent */ *curDigit = '1'; ++curDigit; *pOutExponent += 1; break; } --curDigit; if (*curDigit != '9') { /* increment the digit */ *curDigit += 1; ++curDigit; break; } } } else { /* values in the range [0,8] can perform a simple round up */ *curDigit = (char)('0' + outputDigit + 1); ++curDigit; } } /* return the number of digits output */ outputLen = (npy_uint32)(curDigit - pOutBuffer); DEBUG_ASSERT(outputLen <= bufferSize); return outputLen; } /* * The FormatPositional and FormatScientific functions have been more * significantly rewritten relative to Ryan Juckett's code. * * The binary16 and the various 128-bit float functions are new, and adapted * from the 64 bit version. The python interface functions are new. */ /* Options struct for easy passing of Dragon4 options. * * scientific - boolean controlling whether scientific notation is used * digit_mode - whether to use unique or fixed fractional output * cutoff_mode - whether 'precision' refers to all digits, or digits past * the decimal point. * precision - When negative, prints as many digits as needed for a unique * number. When positive specifies the maximum number of * significant digits to print. * sign - whether to always show sign * trim_mode - how to treat trailing 0s and '.'. See TrimMode comments. * digits_left - pad characters to left of decimal point. -1 for no padding * digits_right - pad characters to right of decimal point. -1 for no padding. * Padding adds whitespace until there are the specified * number characters to sides of decimal point. Applies after * trim_mode characters were removed. If digits_right is * positive and the decimal point was trimmed, decimal point * will be replaced by a whitespace character. * exp_digits - Only affects scientific output. If positive, pads the * exponent with 0s until there are this many digits. If * negative, only use sufficient digits. */ typedef struct Dragon4_Options { npy_bool scientific; DigitMode digit_mode; CutoffMode cutoff_mode; npy_int32 precision; npy_int32 min_digits; npy_bool sign; TrimMode trim_mode; npy_int32 digits_left; npy_int32 digits_right; npy_int32 exp_digits; } Dragon4_Options; /* * Outputs the positive number with positional notation: ddddd.dddd * The output is always NUL terminated and the output length (not including the * NUL) is returned. * * Arguments: * buffer - buffer to output into * bufferSize - maximum characters that can be printed to buffer * mantissa - value significand * exponent - value exponent in base 2 * signbit - value of the sign position. Should be '+', '-' or '' * mantissaBit - index of the highest set mantissa bit * hasUnequalMargins - is the high margin twice as large as the low margin * * See Dragon4_Options for description of remaining arguments. */ static npy_uint32 FormatPositional(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, DigitMode digit_mode, CutoffMode cutoff_mode, npy_int32 precision, npy_int32 min_digits, TrimMode trim_mode, npy_int32 digits_left, npy_int32 digits_right) { npy_int32 printExponent; npy_int32 numDigits, numWholeDigits=0, has_sign=0; npy_int32 add_digits; npy_int32 maxPrintLen = (npy_int32)bufferSize - 1, pos = 0; /* track the # of digits past the decimal point that have been printed */ npy_int32 numFractionDigits = 0, desiredFractionalDigits; DEBUG_ASSERT(bufferSize > 0); if (digit_mode != DigitMode_Unique) { DEBUG_ASSERT(precision >= 0); } if (signbit == '+' && pos < maxPrintLen) { buffer[pos++] = '+'; has_sign = 1; } else if (signbit == '-' && pos < maxPrintLen) { buffer[pos++] = '-'; has_sign = 1; } numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, cutoff_mode, precision, min_digits, buffer + has_sign, maxPrintLen - has_sign, &printExponent); DEBUG_ASSERT(numDigits > 0); DEBUG_ASSERT(numDigits <= bufferSize); /* if output has a whole number */ if (printExponent >= 0) { /* leave the whole number at the start of the buffer */ numWholeDigits = printExponent+1; if (numDigits <= numWholeDigits) { npy_int32 count = numWholeDigits - numDigits; pos += numDigits; /* don't overflow the buffer */ if (pos + count > maxPrintLen) { count = maxPrintLen - pos; } /* add trailing zeros up to the decimal point */ numDigits += count; for ( ; count > 0; count--) { buffer[pos++] = '0'; } } /* insert the decimal point prior to the fraction */ else if (numDigits > numWholeDigits) { npy_int32 maxFractionDigits; numFractionDigits = numDigits - numWholeDigits; maxFractionDigits = maxPrintLen - numWholeDigits - 1 - pos; if (numFractionDigits > maxFractionDigits) { numFractionDigits = maxFractionDigits; } memmove(buffer + pos + numWholeDigits + 1, buffer + pos + numWholeDigits, numFractionDigits); pos += numWholeDigits; buffer[pos] = '.'; numDigits = numWholeDigits + 1 + numFractionDigits; pos += 1 + numFractionDigits; } } else { /* shift out the fraction to make room for the leading zeros */ npy_int32 numFractionZeros = 0; if (pos + 2 < maxPrintLen) { npy_int32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i; maxFractionZeros = maxPrintLen - 2 - pos; numFractionZeros = -(printExponent + 1); if (numFractionZeros > maxFractionZeros) { numFractionZeros = maxFractionZeros; } digitsStartIdx = 2 + numFractionZeros; /* * shift the significant digits right such that there is room for * leading zeros */ numFractionDigits = numDigits; maxFractionDigits = maxPrintLen - digitsStartIdx - pos; if (numFractionDigits > maxFractionDigits) { numFractionDigits = maxFractionDigits; } memmove(buffer + pos + digitsStartIdx, buffer + pos, numFractionDigits); /* insert the leading zeros */ for (i = 2; i < digitsStartIdx; ++i) { buffer[pos + i] = '0'; } /* update the counts */ numFractionDigits += numFractionZeros; numDigits = numFractionDigits; } /* add the decimal point */ if (pos + 1 < maxPrintLen) { buffer[pos+1] = '.'; } /* add the initial zero */ if (pos < maxPrintLen) { buffer[pos] = '0'; numDigits += 1; } numWholeDigits = 1; pos += 2 + numFractionDigits; } /* always add decimal point, except for DprZeros mode */ if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) { buffer[pos++] = '.'; } add_digits = digit_mode == DigitMode_Unique ? min_digits : precision; desiredFractionalDigits = add_digits < 0 ? 0 : add_digits; if (cutoff_mode == CutoffMode_TotalLength) { desiredFractionalDigits = add_digits - numWholeDigits; } if (trim_mode == TrimMode_LeaveOneZero) { /* if we didn't print any fractional digits, add a trailing 0 */ if (numFractionDigits == 0 && pos < maxPrintLen) { buffer[pos++] = '0'; numFractionDigits++; } } else if (trim_mode == TrimMode_None && desiredFractionalDigits > numFractionDigits && pos < maxPrintLen) { /* add trailing zeros up to add_digits length */ /* compute the number of trailing zeros needed */ npy_int32 count = desiredFractionalDigits - numFractionDigits; if (pos + count > maxPrintLen) { count = maxPrintLen - pos; } numFractionDigits += count; for ( ; count > 0; count--) { buffer[pos++] = '0'; } } /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */ /* * when rounding, we may still end up with trailing zeros. Remove them * depending on trim settings. */ if (trim_mode != TrimMode_None && numFractionDigits > 0) { while (buffer[pos-1] == '0') { pos--; numFractionDigits--; } if (buffer[pos-1] == '.') { /* in TrimMode_LeaveOneZero, add trailing 0 back */ if (trim_mode == TrimMode_LeaveOneZero){ buffer[pos++] = '0'; numFractionDigits++; } /* in TrimMode_DptZeros, remove trailing decimal point */ else if (trim_mode == TrimMode_DptZeros) { pos--; } } } /* add any whitespace padding to right side */ if (digits_right >= numFractionDigits) { npy_int32 count = digits_right - numFractionDigits; /* in trim_mode DptZeros, if right padding, add a space for the . */ if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0 && pos < maxPrintLen) { buffer[pos++] = ' '; } if (pos + count > maxPrintLen) { count = maxPrintLen - pos; } for ( ; count > 0; count--) { buffer[pos++] = ' '; } } /* add any whitespace padding to left side */ if (digits_left > numWholeDigits + has_sign) { npy_int32 shift = digits_left - (numWholeDigits + has_sign); npy_int32 count = pos; if (count + shift > maxPrintLen) { count = maxPrintLen - shift; } if (count > 0) { memmove(buffer + shift, buffer, count); } pos = shift + count; for ( ; shift > 0; shift--) { buffer[shift - 1] = ' '; } } /* terminate the buffer */ DEBUG_ASSERT(pos <= maxPrintLen); buffer[pos] = '\0'; return pos; } /* * Outputs the positive number with scientific notation: d.dddde[sign]ddd * The output is always NUL terminated and the output length (not including the * NUL) is returned. * * Arguments: * buffer - buffer to output into * bufferSize - maximum characters that can be printed to buffer * mantissa - value significand * exponent - value exponent in base 2 * signbit - value of the sign position. Should be '+', '-' or '' * mantissaBit - index of the highest set mantissa bit * hasUnequalMargins - is the high margin twice as large as the low margin * * See Dragon4_Options for description of remaining arguments. */ static npy_uint32 FormatScientific (char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, DigitMode digit_mode, npy_int32 precision, npy_int32 min_digits, TrimMode trim_mode, npy_int32 digits_left, npy_int32 exp_digits) { npy_int32 printExponent; npy_int32 numDigits; char *pCurOut; npy_int32 numFractionDigits; npy_int32 leftchars; npy_int32 add_digits; if (digit_mode != DigitMode_Unique) { DEBUG_ASSERT(precision >= 0); } DEBUG_ASSERT(bufferSize > 0); pCurOut = buffer; /* add any whitespace padding to left side */ leftchars = 1 + (signbit == '-' || signbit == '+'); if (digits_left > leftchars) { int i; for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++) { *pCurOut = ' '; pCurOut++; --bufferSize; } } if (signbit == '+' && bufferSize > 1) { *pCurOut = '+'; pCurOut++; --bufferSize; } else if (signbit == '-' && bufferSize > 1) { *pCurOut = '-'; pCurOut++; --bufferSize; } numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins, digit_mode, CutoffMode_TotalLength, precision < 0 ? -1 : precision + 1, min_digits < 0 ? -1 : min_digits + 1, pCurOut, bufferSize, &printExponent); DEBUG_ASSERT(numDigits > 0); DEBUG_ASSERT(numDigits <= bufferSize); /* keep the whole number as the first digit */ if (bufferSize > 1) { pCurOut += 1; bufferSize -= 1; } /* insert the decimal point prior to the fractional number */ numFractionDigits = numDigits-1; if (numFractionDigits > 0 && bufferSize > 1) { npy_int32 maxFractionDigits = (npy_int32)bufferSize - 2; if (numFractionDigits > maxFractionDigits) { numFractionDigits = maxFractionDigits; } memmove(pCurOut + 1, pCurOut, numFractionDigits); pCurOut[0] = '.'; pCurOut += (1 + numFractionDigits); bufferSize -= (1 + numFractionDigits); } /* always add decimal point, except for DprZeros mode */ if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 && bufferSize > 1) { *pCurOut = '.'; ++pCurOut; --bufferSize; } add_digits = digit_mode == DigitMode_Unique ? min_digits : precision; add_digits = add_digits < 0 ? 0 : add_digits; if (trim_mode == TrimMode_LeaveOneZero) { /* if we didn't print any fractional digits, add the 0 */ if (numFractionDigits == 0 && bufferSize > 1) { *pCurOut = '0'; ++pCurOut; --bufferSize; ++numFractionDigits; } } else if (trim_mode == TrimMode_None) { /* add trailing zeros up to add_digits length */ if (add_digits > (npy_int32)numFractionDigits) { char *pEnd; /* compute the number of trailing zeros needed */ npy_int32 numZeros = (add_digits - numFractionDigits); if (numZeros > (npy_int32)bufferSize - 1) { numZeros = (npy_int32)bufferSize - 1; } for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) { *pCurOut = '0'; ++numFractionDigits; } } } /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */ /* * when rounding, we may still end up with trailing zeros. Remove them * depending on trim settings. */ if (trim_mode != TrimMode_None && numFractionDigits > 0) { --pCurOut; while (*pCurOut == '0') { --pCurOut; ++bufferSize; --numFractionDigits; } if (trim_mode == TrimMode_LeaveOneZero && *pCurOut == '.') { ++pCurOut; *pCurOut = '0'; --bufferSize; ++numFractionDigits; } ++pCurOut; } /* print the exponent into a local buffer and copy into output buffer */ if (bufferSize > 1) { char exponentBuffer[7]; npy_int32 digits[5]; npy_int32 i, exp_size, count; if (exp_digits > 5) { exp_digits = 5; } if (exp_digits < 0) { exp_digits = 2; } exponentBuffer[0] = 'e'; if (printExponent >= 0) { exponentBuffer[1] = '+'; } else { exponentBuffer[1] = '-'; printExponent = -printExponent; } DEBUG_ASSERT(printExponent < 100000); /* get exp digits */ for (i = 0; i < 5; i++) { digits[i] = printExponent % 10; printExponent /= 10; } /* count back over leading zeros */ for (i = 5; i > exp_digits && digits[i-1] == 0; i--) { } exp_size = i; /* write remaining digits to tmp buf */ for (i = exp_size; i > 0; i--) { exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]); } /* copy the exponent buffer into the output */ count = exp_size + 2; if (count > (npy_int32)bufferSize - 1) { count = (npy_int32)bufferSize - 1; } memcpy(pCurOut, exponentBuffer, count); pCurOut += count; bufferSize -= count; } DEBUG_ASSERT(bufferSize > 0); pCurOut[0] = '\0'; return pCurOut - buffer; } /* * Print a hexadecimal value with a given width. * The output string is always NUL terminated and the string length (not * including the NUL) is returned. */ /* Unused for now static npy_uint32 PrintHex(char * buffer, npy_uint32 bufferSize, npy_uint64 value, npy_uint32 width) { const char digits[] = "0123456789abcdef"; char *pCurOut; DEBUG_ASSERT(bufferSize > 0); npy_uint32 maxPrintLen = bufferSize-1; if (width > maxPrintLen) { width = maxPrintLen; } pCurOut = buffer; while (width > 0) { --width; npy_uint8 digit = (npy_uint8)((value >> 4ull*(npy_uint64)width) & 0xF); *pCurOut = digits[digit]; ++pCurOut; } *pCurOut = '\0'; return pCurOut - buffer; } */ /* * Print special case values for infinities and NaNs. * The output string is always NUL terminated and the string length (not * including the NUL) is returned. */ static npy_uint32 PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa, npy_uint32 mantissaHexWidth, char signbit) { npy_uint32 maxPrintLen = bufferSize-1; npy_uint32 pos = 0; DEBUG_ASSERT(bufferSize > 0); /* Check for infinity */ if (mantissa == 0) { npy_uint32 printLen; /* only print sign for inf values (though nan can have a sign set) */ if (signbit == '+') { if (pos < maxPrintLen-1) { buffer[pos++] = '+'; } } else if (signbit == '-') { if (pos < maxPrintLen-1) { buffer[pos++] = '-'; } } /* copy and make sure the buffer is terminated */ printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; memcpy(buffer + pos, "inf", printLen); buffer[pos + printLen] = '\0'; return pos + printLen; } else { /* copy and make sure the buffer is terminated */ npy_uint32 printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos; memcpy(buffer + pos, "nan", printLen); buffer[pos + printLen] = '\0'; /* * For numpy we ignore unusual mantissa values for nan, but keep this * code in case we change our mind later. * * // append HEX value * if (maxPrintLen > 3) { * printLen += PrintHex(buffer+3, bufferSize-3, mantissa, * mantissaHexWidth); * } */ return pos + printLen; } } /* * The functions below format a floating-point numbers stored in particular * formats, as a decimal string. The output string is always NUL terminated * and the string length (not including the NUL) is returned. * * For 16, 32 and 64 bit floats we assume they are the IEEE 754 type. * For 128 bit floats we account for different definitions. * * Arguments are: * buffer - buffer to output into * bufferSize - maximum characters that can be printed to buffer * value - value to print * opt - Dragon4 options, see above */ /* * Helper function that takes Dragon4 parameters and options and * calls Dragon4. */ static npy_uint32 Format_floatbits(char *buffer, npy_uint32 bufferSize, BigInt *mantissa, npy_int32 exponent, char signbit, npy_uint32 mantissaBit, npy_bool hasUnequalMargins, Dragon4_Options *opt) { /* format the value */ if (opt->scientific) { return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit, hasUnequalMargins, opt->digit_mode, opt->precision, opt->min_digits, opt->trim_mode, opt->digits_left, opt->exp_digits); } else { return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit, mantissaBit, hasUnequalMargins, opt->digit_mode, opt->cutoff_mode, opt->precision, opt->min_digits, opt->trim_mode, opt->digits_left, opt->digits_right); } } /* * IEEE binary16 floating-point format * * sign: 1 bit * exponent: 5 bits * mantissa: 10 bits */ static npy_uint32 Dragon4_PrintFloat_IEEE_binary16( Dragon4_Scratch *scratch, npy_half *value, Dragon4_Options *opt) { char *buffer = scratch->repr; const npy_uint32 bufferSize = sizeof(scratch->repr); BigInt *bigints = scratch->bigints; npy_uint16 val = *value; npy_uint32 floatExponent, floatMantissa, floatSign; npy_uint32 mantissa; npy_int32 exponent; npy_uint32 mantissaBit; npy_bool hasUnequalMargins; char signbit = '\0'; /* deconstruct the floating point value */ floatMantissa = val & bitmask_u32(10); floatExponent = (val >> 10) & bitmask_u32(5); floatSign = val >> 15; /* output the sign */ if (floatSign != 0) { signbit = '-'; } else if (opt->sign) { signbit = '+'; } /* if this is a special value */ if (floatExponent == bitmask_u32(5)) { return PrintInfNan(buffer, bufferSize, floatMantissa, 3, signbit); } /* else this is a number */ /* factor the value into its parts */ if (floatExponent != 0) { /* * normalized * The floating point equation is: * value = (1 + mantissa/2^10) * 2 ^ (exponent-15) * We convert the integer equation by factoring a 2^10 out of the * exponent * value = (1 + mantissa/2^10) * 2^10 * 2 ^ (exponent-15-10) * value = (2^10 + mantissa) * 2 ^ (exponent-15-10) * Because of the implied 1 in front of the mantissa we have 10 bits of * precision. * m = (2^10 + mantissa) * e = (exponent-15-10) */ mantissa = (1UL << 10) | floatMantissa; exponent = floatExponent - 15 - 10; mantissaBit = 10; hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0); } else { /* * denormalized * The floating point equation is: * value = (mantissa/2^10) * 2 ^ (1-15) * We convert the integer equation by factoring a 2^23 out of the * exponent * value = (mantissa/2^10) * 2^10 * 2 ^ (1-15-10) * value = mantissa * 2 ^ (1-15-10) * We have up to 10 bits of precision. * m = (mantissa) * e = (1-15-10) */ mantissa = floatMantissa; exponent = 1 - 15 - 10; mantissaBit = LogBase2_32(mantissa); hasUnequalMargins = NPY_FALSE; } BigInt_Set_uint32(&bigints[0], mantissa); return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, hasUnequalMargins, opt); } /* * IEEE binary32 floating-point format * * sign: 1 bit * exponent: 8 bits * mantissa: 23 bits */ static npy_uint32 Dragon4_PrintFloat_IEEE_binary32( Dragon4_Scratch *scratch, npy_float32 *value, Dragon4_Options *opt) { char *buffer = scratch->repr; const npy_uint32 bufferSize = sizeof(scratch->repr); BigInt *bigints = scratch->bigints; union { npy_float32 floatingPoint; npy_uint32 integer; } floatUnion; npy_uint32 floatExponent, floatMantissa, floatSign; npy_uint32 mantissa; npy_int32 exponent; npy_uint32 mantissaBit; npy_bool hasUnequalMargins; char signbit = '\0'; /* deconstruct the floating point value */ floatUnion.floatingPoint = *value; floatMantissa = floatUnion.integer & bitmask_u32(23); floatExponent = (floatUnion.integer >> 23) & bitmask_u32(8); floatSign = floatUnion.integer >> 31; /* output the sign */ if (floatSign != 0) { signbit = '-'; } else if (opt->sign) { signbit = '+'; } /* if this is a special value */ if (floatExponent == bitmask_u32(8)) { return PrintInfNan(buffer, bufferSize, floatMantissa, 6, signbit); } /* else this is a number */ /* factor the value into its parts */ if (floatExponent != 0) { /* * normalized * The floating point equation is: * value = (1 + mantissa/2^23) * 2 ^ (exponent-127) * We convert the integer equation by factoring a 2^23 out of the * exponent * value = (1 + mantissa/2^23) * 2^23 * 2 ^ (exponent-127-23) * value = (2^23 + mantissa) * 2 ^ (exponent-127-23) * Because of the implied 1 in front of the mantissa we have 24 bits of * precision. * m = (2^23 + mantissa) * e = (exponent-127-23) */ mantissa = (1UL << 23) | floatMantissa; exponent = floatExponent - 127 - 23; mantissaBit = 23; hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0); } else { /* * denormalized * The floating point equation is: * value = (mantissa/2^23) * 2 ^ (1-127) * We convert the integer equation by factoring a 2^23 out of the * exponent * value = (mantissa/2^23) * 2^23 * 2 ^ (1-127-23) * value = mantissa * 2 ^ (1-127-23) * We have up to 23 bits of precision. * m = (mantissa) * e = (1-127-23) */ mantissa = floatMantissa; exponent = 1 - 127 - 23; mantissaBit = LogBase2_32(mantissa); hasUnequalMargins = NPY_FALSE; } BigInt_Set_uint32(&bigints[0], mantissa); return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, hasUnequalMargins, opt); } /* * IEEE binary64 floating-point format * * sign: 1 bit * exponent: 11 bits * mantissa: 52 bits */ static npy_uint32 Dragon4_PrintFloat_IEEE_binary64( Dragon4_Scratch *scratch, npy_float64 *value, Dragon4_Options *opt) { char *buffer = scratch->repr; const npy_uint32 bufferSize = sizeof(scratch->repr); BigInt *bigints = scratch->bigints; union { npy_float64 floatingPoint; npy_uint64 integer; } floatUnion; npy_uint32 floatExponent, floatSign; npy_uint64 floatMantissa; npy_uint64 mantissa; npy_int32 exponent; npy_uint32 mantissaBit; npy_bool hasUnequalMargins; char signbit = '\0'; /* deconstruct the floating point value */ floatUnion.floatingPoint = *value; floatMantissa = floatUnion.integer & bitmask_u64(52); floatExponent = (floatUnion.integer >> 52) & bitmask_u32(11); floatSign = floatUnion.integer >> 63; /* output the sign */ if (floatSign != 0) { signbit = '-'; } else if (opt->sign) { signbit = '+'; } /* if this is a special value */ if (floatExponent == bitmask_u32(11)) { return PrintInfNan(buffer, bufferSize, floatMantissa, 13, signbit); } /* else this is a number */ /* factor the value into its parts */ if (floatExponent != 0) { /* * normal * The floating point equation is: * value = (1 + mantissa/2^52) * 2 ^ (exponent-1023) * We convert the integer equation by factoring a 2^52 out of the * exponent * value = (1 + mantissa/2^52) * 2^52 * 2 ^ (exponent-1023-52) * value = (2^52 + mantissa) * 2 ^ (exponent-1023-52) * Because of the implied 1 in front of the mantissa we have 53 bits of * precision. * m = (2^52 + mantissa) * e = (exponent-1023+1-53) */ mantissa = (1ull << 52) | floatMantissa; exponent = floatExponent - 1023 - 52; mantissaBit = 52; hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0); } else { /* * subnormal * The floating point equation is: * value = (mantissa/2^52) * 2 ^ (1-1023) * We convert the integer equation by factoring a 2^52 out of the * exponent * value = (mantissa/2^52) * 2^52 * 2 ^ (1-1023-52) * value = mantissa * 2 ^ (1-1023-52) * We have up to 52 bits of precision. * m = (mantissa) * e = (1-1023-52) */ mantissa = floatMantissa; exponent = 1 - 1023 - 52; mantissaBit = LogBase2_64(mantissa); hasUnequalMargins = NPY_FALSE; } BigInt_Set_uint64(&bigints[0], mantissa); return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, hasUnequalMargins, opt); } /* * Since systems have different types of long doubles, and may not necessarily * have a 128-byte format we can use to pass values around, here we create * our own 128-bit storage type for convenience. */ typedef struct FloatVal128 { npy_uint64 hi, lo; } FloatVal128; #if defined(HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE) || \ defined(HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE) || \ defined(HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE) || \ defined(HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE) /* * Intel's 80-bit IEEE extended precision floating-point format * * "long doubles" with this format are stored as 96 or 128 bits, but * are equivalent to the 80 bit type with some zero padding on the high bits. * This method expects the user to pass in the value using a 128-bit * FloatVal128, so can support 80, 96, or 128 bit storage formats, * and is endian-independent. * * sign: 1 bit, second u64 * exponent: 15 bits, second u64 * intbit 1 bit, first u64 * mantissa: 63 bits, first u64 */ static npy_uint32 Dragon4_PrintFloat_Intel_extended( Dragon4_Scratch *scratch, FloatVal128 value, Dragon4_Options *opt) { char *buffer = scratch->repr; const npy_uint32 bufferSize = sizeof(scratch->repr); BigInt *bigints = scratch->bigints; npy_uint32 floatExponent, floatSign; npy_uint64 floatMantissa; npy_uint64 mantissa; npy_int32 exponent; npy_uint32 mantissaBit; npy_bool hasUnequalMargins; char signbit = '\0'; /* deconstruct the floating point value (we ignore the intbit) */ floatMantissa = value.lo & bitmask_u64(63); floatExponent = value.hi & bitmask_u32(15); floatSign = (value.hi >> 15) & 0x1; /* output the sign */ if (floatSign != 0) { signbit = '-'; } else if (opt->sign) { signbit = '+'; } /* if this is a special value */ if (floatExponent == bitmask_u32(15)) { /* * Note: Technically there are other special extended values defined if * the intbit is 0, like Pseudo-Infinity, Pseudo-Nan, Quiet-NaN. We * ignore all of these since they are not generated on modern * processors. We treat Quiet-Nan as simply Nan. */ return PrintInfNan(buffer, bufferSize, floatMantissa, 16, signbit); } /* else this is a number */ /* factor the value into its parts */ if (floatExponent != 0) { /* * normal * The floating point equation is: * value = (1 + mantissa/2^63) * 2 ^ (exponent-16383) * We convert the integer equation by factoring a 2^63 out of the * exponent * value = (1 + mantissa/2^63) * 2^63 * 2 ^ (exponent-16383-63) * value = (2^63 + mantissa) * 2 ^ (exponent-16383-63) * Because of the implied 1 in front of the mantissa we have 64 bits of * precision. * m = (2^63 + mantissa) * e = (exponent-16383+1-64) */ mantissa = (1ull << 63) | floatMantissa; exponent = floatExponent - 16383 - 63; mantissaBit = 63; hasUnequalMargins = (floatExponent != 1) && (floatMantissa == 0); } else { /* * subnormal * The floating point equation is: * value = (mantissa/2^63) * 2 ^ (1-16383) * We convert the integer equation by factoring a 2^52 out of the * exponent * value = (mantissa/2^63) * 2^52 * 2 ^ (1-16383-63) * value = mantissa * 2 ^ (1-16383-63) * We have up to 63 bits of precision. * m = (mantissa) * e = (1-16383-63) */ mantissa = floatMantissa; exponent = 1 - 16383 - 63; mantissaBit = LogBase2_64(mantissa); hasUnequalMargins = NPY_FALSE; } BigInt_Set_uint64(&bigints[0], mantissa); return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, hasUnequalMargins, opt); } #endif /* INTEL_EXTENDED group */ #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE /* * Intel's 80-bit IEEE extended precision format, 80-bit storage * * Note: It is not clear if a long double with 10-byte storage exists on any * system. But numpy defines NPY_FLOAT80, so if we come across it, assume it is * an Intel extended format. */ static npy_uint32 Dragon4_PrintFloat_Intel_extended80( Dragon4_Scratch *scratch, npy_float80 *value, Dragon4_Options *opt) { FloatVal128 val128; union { npy_float80 floatingPoint; struct { npy_uint64 a; npy_uint16 b; } integer; } buf80; buf80.floatingPoint = *value; /* Intel is little-endian */ val128.lo = buf80.integer.a; val128.hi = buf80.integer.b; return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); } #endif /* HAVE_LDOUBLE_INTEL_EXTENDED_10_BYTES_LE */ #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE /* Intel's 80-bit IEEE extended precision format, 96-bit storage */ static npy_uint32 Dragon4_PrintFloat_Intel_extended96( Dragon4_Scratch *scratch, npy_float96 *value, Dragon4_Options *opt) { FloatVal128 val128; union { npy_float96 floatingPoint; struct { npy_uint64 a; npy_uint32 b; } integer; } buf96; buf96.floatingPoint = *value; /* Intel is little-endian */ val128.lo = buf96.integer.a; val128.hi = buf96.integer.b; return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); } #endif /* HAVE_LDOUBLE_INTEL_EXTENDED_12_BYTES_LE */ #ifdef HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE /* Motorola Big-endian equivalent of the Intel-extended 96 fp format */ static npy_uint32 Dragon4_PrintFloat_Motorola_extended96( Dragon4_Scratch *scratch, npy_float96 *value, Dragon4_Options *opt) { FloatVal128 val128; union { npy_float96 floatingPoint; struct { npy_uint64 a; npy_uint32 b; } integer; } buf96; buf96.floatingPoint = *value; /* Motorola is big-endian */ val128.lo = buf96.integer.b; val128.hi = buf96.integer.a >> 16; /* once again we assume the int has same endianness as the float */ return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); } #endif /* HAVE_LDOUBLE_MOTOROLA_EXTENDED_12_BYTES_BE */ #ifdef NPY_FLOAT128 typedef union FloatUnion128 { npy_float128 floatingPoint; struct { npy_uint64 a; npy_uint64 b; } integer; } FloatUnion128; #ifdef HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE /* Intel's 80-bit IEEE extended precision format, 128-bit storage */ static npy_uint32 Dragon4_PrintFloat_Intel_extended128( Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) { FloatVal128 val128; FloatUnion128 buf128; buf128.floatingPoint = *value; /* Intel is little-endian */ val128.lo = buf128.integer.a; val128.hi = buf128.integer.b; return Dragon4_PrintFloat_Intel_extended(scratch, val128, opt); } #endif /* HAVE_LDOUBLE_INTEL_EXTENDED_16_BYTES_LE */ #if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) || defined(HAVE_LDOUBLE_IEEE_QUAD_BE) /* * IEEE binary128 floating-point format * * sign: 1 bit * exponent: 15 bits * mantissa: 112 bits * * Currently binary128 format exists on only a few CPUs, such as on the POWER9 * arch or aarch64. Because of this, this code has not been extensively tested. * I am not sure if the arch also supports uint128, and C does not seem to * support int128 literals. So we use uint64 to do manipulation. */ static npy_uint32 Dragon4_PrintFloat_IEEE_binary128( Dragon4_Scratch *scratch, FloatVal128 val128, Dragon4_Options *opt) { char *buffer = scratch->repr; const npy_uint32 bufferSize = sizeof(scratch->repr); BigInt *bigints = scratch->bigints; npy_uint32 floatExponent, floatSign; npy_uint64 mantissa_hi, mantissa_lo; npy_int32 exponent; npy_uint32 mantissaBit; npy_bool hasUnequalMargins; char signbit = '\0'; mantissa_hi = val128.hi & bitmask_u64(48); mantissa_lo = val128.lo; floatExponent = (val128.hi >> 48) & bitmask_u32(15); floatSign = val128.hi >> 63; /* output the sign */ if (floatSign != 0) { signbit = '-'; } else if (opt->sign) { signbit = '+'; } /* if this is a special value */ if (floatExponent == bitmask_u32(15)) { npy_uint64 mantissa_zero = mantissa_hi == 0 && mantissa_lo == 0; return PrintInfNan(buffer, bufferSize, !mantissa_zero, 16, signbit); } /* else this is a number */ /* factor the value into its parts */ if (floatExponent != 0) { /* * normal * The floating point equation is: * value = (1 + mantissa/2^112) * 2 ^ (exponent-16383) * We convert the integer equation by factoring a 2^112 out of the * exponent * value = (1 + mantissa/2^112) * 2^112 * 2 ^ (exponent-16383-112) * value = (2^112 + mantissa) * 2 ^ (exponent-16383-112) * Because of the implied 1 in front of the mantissa we have 112 bits of * precision. * m = (2^112 + mantissa) * e = (exponent-16383+1-112) * * Adding 2^112 to the mantissa is the same as adding 2^48 to the hi * 64 bit part. */ mantissa_hi = (1ull << 48) | mantissa_hi; /* mantissa_lo is unchanged */ exponent = floatExponent - 16383 - 112; mantissaBit = 112; hasUnequalMargins = (floatExponent != 1) && (mantissa_hi == 0 && mantissa_lo == 0); } else { /* * subnormal * The floating point equation is: * value = (mantissa/2^112) * 2 ^ (1-16383) * We convert the integer equation by factoring a 2^112 out of the * exponent * value = (mantissa/2^112) * 2^112 * 2 ^ (1-16383-112) * value = mantissa * 2 ^ (1-16383-112) * We have up to 112 bits of precision. * m = (mantissa) * e = (1-16383-112) */ exponent = 1 - 16383 - 112; mantissaBit = LogBase2_128(mantissa_hi, mantissa_lo); hasUnequalMargins = NPY_FALSE; } BigInt_Set_2x_uint64(&bigints[0], mantissa_hi, mantissa_lo); return Format_floatbits(buffer, bufferSize, bigints, exponent, signbit, mantissaBit, hasUnequalMargins, opt); } #if defined(HAVE_LDOUBLE_IEEE_QUAD_LE) static npy_uint32 Dragon4_PrintFloat_IEEE_binary128_le( Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) { FloatVal128 val128; FloatUnion128 buf128; buf128.floatingPoint = *value; val128.lo = buf128.integer.a; val128.hi = buf128.integer.b; return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt); } #endif /* HAVE_LDOUBLE_IEEE_QUAD_LE */ #if defined(HAVE_LDOUBLE_IEEE_QUAD_BE) /* * This function is untested, very few, if any, architectures implement * big endian IEEE binary128 floating point. */ static npy_uint32 Dragon4_PrintFloat_IEEE_binary128_be( Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) { FloatVal128 val128; FloatUnion128 buf128; buf128.floatingPoint = *value; val128.lo = buf128.integer.b; val128.hi = buf128.integer.a; return Dragon4_PrintFloat_IEEE_binary128(scratch, val128, opt); } #endif /* HAVE_LDOUBLE_IEEE_QUAD_BE */ #endif /* HAVE_LDOUBLE_IEEE_QUAD_LE | HAVE_LDOUBLE_IEEE_BE*/ #if (defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE) || \ defined(HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE)) /* * IBM extended precision 128-bit floating-point format, aka IBM double-double * * IBM's double-double type is a pair of IEEE binary64 values, which you add * together to get a total value. The exponents are arranged so that the lower * double is about 2^52 times smaller than the high one, and the nearest * float64 value is simply the upper double, in which case the pair is * considered "normalized" (not to confuse with "normal" and "subnormal" * binary64 values). We assume normalized values. You can see the glibc's * printf on ppc does so too by constructing un-normalized values to get * strange behavior from the OS printf: * * >>> from numpy.core._multiarray_tests import format_float_OSprintf_g * >>> x = np.array([0.3,0.3], dtype='f8').view('f16')[0] * >>> format_float_OSprintf_g(x, 2) * 0.30 * >>> format_float_OSprintf_g(2*x, 2) * 1.20 * * If we don't assume normalization, x should really print as 0.6. * * For normalized values gcc assumes that the total mantissa is no * more than 106 bits (53+53), so we can drop bits from the second double which * would be pushed past 106 when left-shifting by its exponent, as happens * sometimes. (There has been debate about this, see * https://gcc.gnu.org/bugzilla/show_bug.cgi?format=multiple&id=70117, * https://sourceware.org/bugzilla/show_bug.cgi?id=22752 ) * * Note: This function is for the IBM-double-double which is a pair of IEEE * binary64 floats, like on ppc64 systems. This is *not* the hexadecimal * IBM-double-double type, which is a pair of IBM hexadecimal64 floats. * * See also: * https://gcc.gnu.org/wiki/Ieee128PowerPCA * https://www.ibm.com/support/knowledgecenter/en/ssw_aix_71/com.ibm.aix.genprogc/128bit_long_double_floating-point_datatype.htm */ static npy_uint32 Dragon4_PrintFloat_IBM_double_double( Dragon4_Scratch *scratch, npy_float128 *value, Dragon4_Options *opt) { char *buffer = scratch->repr; const npy_uint32 bufferSize = sizeof(scratch->repr); BigInt *bigints = scratch->bigints; FloatVal128 val128; FloatUnion128 buf128; npy_uint32 floatExponent1, floatExponent2; npy_uint64 floatMantissa1, floatMantissa2; npy_uint32 floatSign1, floatSign2; npy_uint64 mantissa1, mantissa2; npy_int32 exponent1, exponent2; int shift; npy_uint32 mantissaBit; npy_bool hasUnequalMargins; char signbit = '\0'; /* The high part always comes before the low part, regardless of the * endianness of the system. */ buf128.floatingPoint = *value; val128.hi = buf128.integer.a; val128.lo = buf128.integer.b; /* deconstruct the floating point values */ floatMantissa1 = val128.hi & bitmask_u64(52); floatExponent1 = (val128.hi >> 52) & bitmask_u32(11); floatSign1 = (val128.hi >> 63) != 0; floatMantissa2 = val128.lo & bitmask_u64(52); floatExponent2 = (val128.lo >> 52) & bitmask_u32(11); floatSign2 = (val128.lo >> 63) != 0; /* output the sign using 1st float's sign */ if (floatSign1) { signbit = '-'; } else if (opt->sign) { signbit = '+'; } /* we only need to look at the first float for inf/nan */ if (floatExponent1 == bitmask_u32(11)) { return PrintInfNan(buffer, bufferSize, floatMantissa1, 13, signbit); } /* else this is a number */ /* Factor the 1st value into its parts, see binary64 for comments. */ if (floatExponent1 == 0) { /* * If the first number is a subnormal value, the 2nd has to be 0 for * the float128 to be normalized, so we can ignore it. In this case * the float128 only has the precision of a single binary64 value. */ mantissa1 = floatMantissa1; exponent1 = 1 - 1023 - 52; mantissaBit = LogBase2_64(mantissa1); hasUnequalMargins = NPY_FALSE; BigInt_Set_uint64(&bigints[0], mantissa1); } else { mantissa1 = (1ull << 52) | floatMantissa1; exponent1 = floatExponent1 - 1023 - 52; mantissaBit = 52 + 53; /* * Computing hasUnequalMargins and mantissaBit: * This is a little trickier than for IEEE formats. * * When both doubles are "normal" it is clearer since we can think of * it as an IEEE type with a 106 bit mantissa. This value can never * have "unequal" margins because of the implied 1 bit in the 2nd * value. (unequal margins only happen when the mantissa has a value * like "10000000000...", all zeros except the implied bit at the * start, since the next lowest number has a different exponent). * mantissaBits will always be 52+53 in this case. * * If the 1st number is a very small normal, and the 2nd is subnormal * and not 2^52 times smaller, the number behaves like a subnormal * overall, where the upper number just adds some bits on the left. * Like usual subnormals, it has "equal" margins. The slightly tricky * thing is that the number of mantissaBits varies. It will be 52 * (from lower double) plus a variable number depending on the upper * number's exponent. We recompute the number of bits in the shift * calculation below, because the shift will be equal to the number of * lost bits. * * We can get unequal margins only if the first value has all-0 * mantissa (except implied bit), and the second value is exactly 0. As * a special exception the smallest normal value (smallest exponent, 0 * mantissa) should have equal margins, since it is "next to" a * subnormal value. */ /* factor the 2nd value into its parts */ if (floatExponent2 != 0) { mantissa2 = (1ull << 52) | floatMantissa2; exponent2 = floatExponent2 - 1023 - 52; hasUnequalMargins = NPY_FALSE; } else { /* shift exp by one so that leading mantissa bit is still bit 53 */ mantissa2 = floatMantissa2 << 1; exponent2 = - 1023 - 52; hasUnequalMargins = (floatExponent1 != 1) && (floatMantissa1 == 0) && (floatMantissa2 == 0); } /* * The 2nd val's exponent might not be exactly 52 smaller than the 1st, * it can vary a little bit. So do some shifting of the low mantissa, * so that the total mantissa is equivalent to bits 53 to 0 of the * first double immediately followed by bits 53 to 0 of the second. */ shift = exponent1 - exponent2 - 53; if (shift > 0) { /* shift more than 64 is undefined behavior */ mantissa2 = shift < 64 ? mantissa2 >> shift : 0; } else if (shift < 0) { /* * This only happens if the 2nd value is subnormal. * We expect that shift > -64, but check it anyway */ mantissa2 = -shift < 64 ? mantissa2 << -shift : 0; } /* * If the low double is a different sign from the high double, * rearrange so that the total mantissa is the sum of the two * mantissas, instead of a subtraction. * hi - lo -> (hi-1) + (1-lo), where lo < 1 */ if (floatSign1 != floatSign2 && mantissa2 != 0) { mantissa1--; mantissa2 = (1ull << 53) - mantissa2; } /* * Compute the number of bits if we are in the subnormal range. * The value "shift" happens to be exactly the number of lost bits. * Also, shift the bits so that the least significant bit is at * bit position 0, like a typical subnormal. After this exponent1 * should always be 2^-1022 */ if (shift < 0) { mantissa2 = (mantissa2 >> -shift) | (mantissa1 << (53 + shift)); mantissa1 = mantissa1 >> -shift; mantissaBit = mantissaBit -(-shift); exponent1 -= shift; DEBUG_ASSERT(exponent1 == -1022); } /* * set up the BigInt mantissa, by shifting the parts as needed * We can use | instead of + since the mantissas should not overlap */ BigInt_Set_2x_uint64(&bigints[0], mantissa1 >> 11, (mantissa1 << 53) | (mantissa2)); exponent1 = exponent1 - 53; } return Format_floatbits(buffer, bufferSize, bigints, exponent1, signbit, mantissaBit, hasUnequalMargins, opt); } #endif /* HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_LE | HAVE_LDOUBLE_IBM_DOUBLE_DOUBLE_BE */ #endif /* NPY_FLOAT128 */ /* * Here we define two Dragon4 entry functions for each type. One of them * accepts the args in a Dragon4_Options struct for convenience, the * other enumerates only the necessary parameters. * * Use a very large string buffer in case anyone tries to output a large number. * 16384 should be enough to exactly print the integer part of any float128, * which goes up to about 10^4932. The Dragon4_scratch struct provides a string * buffer of this size. */ #define make_dragon4_typefuncs_inner(Type, npy_type, format) \ \ PyObject *\ Dragon4_Positional_##Type##_opt(npy_type *val, Dragon4_Options *opt)\ {\ PyObject *ret;\ Dragon4_Scratch *scratch = get_dragon4_bigint_scratch();\ if (scratch == NULL) {\ return NULL;\ }\ if (Dragon4_PrintFloat_##format(scratch, val, opt) < 0) {\ free_dragon4_bigint_scratch(scratch);\ return NULL;\ }\ ret = PyUnicode_FromString(scratch->repr);\ free_dragon4_bigint_scratch(scratch);\ return ret;\ }\ \ PyObject *\ Dragon4_Positional_##Type(npy_type *val, DigitMode digit_mode,\ CutoffMode cutoff_mode, int precision, int min_digits, \ int sign, TrimMode trim, int pad_left, int pad_right)\ {\ Dragon4_Options opt;\ \ opt.scientific = 0;\ opt.digit_mode = digit_mode;\ opt.cutoff_mode = cutoff_mode;\ opt.precision = precision;\ opt.min_digits = min_digits;\ opt.sign = sign;\ opt.trim_mode = trim;\ opt.digits_left = pad_left;\ opt.digits_right = pad_right;\ opt.exp_digits = -1;\ \ return Dragon4_Positional_##Type##_opt(val, &opt);\ }\ \ PyObject *\ Dragon4_Scientific_##Type##_opt(npy_type *val, Dragon4_Options *opt)\ {\ PyObject *ret;\ Dragon4_Scratch *scratch = get_dragon4_bigint_scratch();\ if (scratch == NULL) {\ return NULL;\ }\ if (Dragon4_PrintFloat_##format(scratch, val, opt) < 0) {\ free_dragon4_bigint_scratch(scratch);\ return NULL;\ }\ ret = PyUnicode_FromString(scratch->repr);\ free_dragon4_bigint_scratch(scratch);\ return ret;\ }\ PyObject *\ Dragon4_Scientific_##Type(npy_type *val, DigitMode digit_mode, int precision,\ int min_digits, int sign, TrimMode trim, int pad_left, \ int exp_digits)\ {\ Dragon4_Options opt;\ \ opt.scientific = 1;\ opt.digit_mode = digit_mode;\ opt.cutoff_mode = CutoffMode_TotalLength;\ opt.precision = precision;\ opt.min_digits = min_digits;\ opt.sign = sign;\ opt.trim_mode = trim;\ opt.digits_left = pad_left;\ opt.digits_right = -1;\ opt.exp_digits = exp_digits;\ \ return Dragon4_Scientific_##Type##_opt(val, &opt);\ } #define make_dragon4_typefuncs(Type, npy_type, format) \ make_dragon4_typefuncs_inner(Type, npy_type, format) make_dragon4_typefuncs(Half, npy_half, NPY_HALF_BINFMT_NAME) make_dragon4_typefuncs(Float, npy_float, NPY_FLOAT_BINFMT_NAME) make_dragon4_typefuncs(Double, npy_double, NPY_DOUBLE_BINFMT_NAME) make_dragon4_typefuncs(LongDouble, npy_longdouble, NPY_LONGDOUBLE_BINFMT_NAME) #undef make_dragon4_typefuncs #undef make_dragon4_typefuncs_inner PyObject * Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int pad_right) { npy_double val; Dragon4_Options opt; opt.scientific = 0; opt.digit_mode = digit_mode; opt.cutoff_mode = cutoff_mode; opt.precision = precision; opt.min_digits = min_digits; opt.sign = sign; opt.trim_mode = trim; opt.digits_left = pad_left; opt.digits_right = pad_right; opt.exp_digits = -1; if (PyArray_IsScalar(obj, Half)) { npy_half x = PyArrayScalar_VAL(obj, Half); return Dragon4_Positional_Half_opt(&x, &opt); } else if (PyArray_IsScalar(obj, Float)) { npy_float x = PyArrayScalar_VAL(obj, Float); return Dragon4_Positional_Float_opt(&x, &opt); } else if (PyArray_IsScalar(obj, Double)) { npy_double x = PyArrayScalar_VAL(obj, Double); return Dragon4_Positional_Double_opt(&x, &opt); } else if (PyArray_IsScalar(obj, LongDouble)) { npy_longdouble x = PyArrayScalar_VAL(obj, LongDouble); return Dragon4_Positional_LongDouble_opt(&x, &opt); } val = PyFloat_AsDouble(obj); if (PyErr_Occurred()) { return NULL; } return Dragon4_Positional_Double_opt(&val, &opt); } PyObject * Dragon4_Scientific(PyObject *obj, DigitMode digit_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int exp_digits) { npy_double val; Dragon4_Options opt; opt.scientific = 1; opt.digit_mode = digit_mode; opt.cutoff_mode = CutoffMode_TotalLength; opt.precision = precision; opt.min_digits = min_digits; opt.sign = sign; opt.trim_mode = trim; opt.digits_left = pad_left; opt.digits_right = -1; opt.exp_digits = exp_digits; if (PyArray_IsScalar(obj, Half)) { npy_half x = PyArrayScalar_VAL(obj, Half); return Dragon4_Scientific_Half_opt(&x, &opt); } else if (PyArray_IsScalar(obj, Float)) { npy_float x = PyArrayScalar_VAL(obj, Float); return Dragon4_Scientific_Float_opt(&x, &opt); } else if (PyArray_IsScalar(obj, Double)) { npy_double x = PyArrayScalar_VAL(obj, Double); return Dragon4_Scientific_Double_opt(&x, &opt); } else if (PyArray_IsScalar(obj, LongDouble)) { npy_longdouble x = PyArrayScalar_VAL(obj, LongDouble); return Dragon4_Scientific_LongDouble_opt(&x, &opt); } val = PyFloat_AsDouble(obj); if (PyErr_Occurred()) { return NULL; } return Dragon4_Scientific_Double_opt(&val, &opt); } #undef DEBUG_ASSERT