summaryrefslogtreecommitdiff
path: root/numpy/core/src
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2023-02-28 09:01:48 +0200
committerSayed Adel <seiko@imavr.com>2023-04-05 08:59:03 +0200
commitea15a576a17dbadffbe2115dee3f40baca311bdd (patch)
treed648c889cf7538b1c9f90de8af781492ff83fce8 /numpy/core/src
parentbba99dbd606d173a12c7de1867e2337de17c60e9 (diff)
downloadnumpy-ea15a576a17dbadffbe2115dee3f40baca311bdd.tar.gz
ENH: Extend the functionlty of C++ type `np::Half`
- optimize float/double conversions on x86, requires for now raising up the baseline features to `f16c` at least during the build. - optimize float/double conversions on ppc64le, requires for now raising up the baseline features to `VSX3` at least during the build. - Brings `np::Half` to npymath
Diffstat (limited to 'numpy/core/src')
-rw-r--r--numpy/core/src/common/common.hpp3
-rw-r--r--numpy/core/src/common/float_status.hpp134
-rw-r--r--numpy/core/src/common/half.hpp255
-rw-r--r--numpy/core/src/common/half_private.hpp330
-rw-r--r--numpy/core/src/common/npdef.hpp28
-rw-r--r--numpy/core/src/common/npstd.hpp2
-rw-r--r--numpy/core/src/common/utils.hpp51
-rw-r--r--numpy/core/src/npymath/halffloat.c555
-rw-r--r--numpy/core/src/npymath/halffloat.cpp238
9 files changed, 1014 insertions, 582 deletions
diff --git a/numpy/core/src/common/common.hpp b/numpy/core/src/common/common.hpp
index 47d790bcf..44ba449d8 100644
--- a/numpy/core/src/common/common.hpp
+++ b/numpy/core/src/common/common.hpp
@@ -4,8 +4,11 @@
* The following C++ headers are safe to be used standalone, however,
* they are gathered to make it easy for us and for the future need to support PCH.
*/
+#include "npdef.hpp"
+#include "utils.hpp"
#include "npstd.hpp"
#include "half.hpp"
#include "meta.hpp"
+#include "float_status.hpp"
#endif // NUMPY_CORE_SRC_COMMON_COMMON_HPP
diff --git a/numpy/core/src/common/float_status.hpp b/numpy/core/src/common/float_status.hpp
new file mode 100644
index 000000000..8e4d5e06a
--- /dev/null
+++ b/numpy/core/src/common/float_status.hpp
@@ -0,0 +1,134 @@
+#ifndef NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+#define NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+
+#include "npstd.hpp"
+
+#include <fenv.h>
+
+namespace np {
+
+/// @addtogroup cpp_core_utility
+/// @{
+/**
+ * Class wraps floating-point environment operations,
+ * provides lazy access to its functionality.
+ */
+class FloatStatus {
+ public:
+/*
+ * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
+ * unsupported. In such cases NumPy will not report these correctly, but we
+ * should still allow compiling (whether tests pass or not).
+ * By defining them as 0 locally, we make them no-ops. Unlike these defines,
+ * for example `musl` still defines all of the functions (as no-ops):
+ * https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
+ * and does similar replacement in its tests:
+ * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30
+ */
+#ifdef FE_DIVBYZERO
+ static constexpr int kDivideByZero = FE_DIVBYZERO;
+#else
+ static constexpr int kDivideByZero = 0;
+#endif
+#ifdef FE_INVALID
+ static constexpr int kInvalid = FE_INVALID;
+#else
+ static constexpr int kInvalid = 0;
+#endif
+#ifdef FE_INEXACT
+ static constexpr int kInexact = FE_INEXACT;
+#else
+ static constexpr int kInexact = 0;
+#endif
+#ifdef FE_OVERFLOW
+ static constexpr int kOverflow = FE_OVERFLOW;
+#else
+ static constexpr int kOverflow = 0;
+#endif
+#ifdef FE_UNDERFLOW
+ static constexpr int kUnderflow = FE_UNDERFLOW;
+#else
+ static constexpr int kUnderflow = 0;
+#endif
+ static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact |
+ kOverflow | kUnderflow);
+
+ FloatStatus(bool clear_on_dst=true)
+ : clear_on_dst_(clear_on_dst)
+ {
+ if constexpr (kAllExcept != 0) {
+ fpstatus_ = fetestexcept(kAllExcept);
+ }
+ else {
+ fpstatus_ = 0;
+ }
+ }
+ ~FloatStatus()
+ {
+ if constexpr (kAllExcept != 0) {
+ if (fpstatus_ != 0 && clear_on_dst_) {
+ feclearexcept(kAllExcept);
+ }
+ }
+ }
+ constexpr bool IsDivideByZero() const
+ {
+ return (fpstatus_ & kDivideByZero) != 0;
+ }
+ constexpr bool IsInexact() const
+ {
+ return (fpstatus_ & kInexact) != 0;
+ }
+ constexpr bool IsInvalid() const
+ {
+ return (fpstatus_ & kInvalid) != 0;
+ }
+ constexpr bool IsOverFlow() const
+ {
+ return (fpstatus_ & kOverflow) != 0;
+ }
+ constexpr bool IsUnderFlow() const
+ {
+ return (fpstatus_ & kUnderflow) != 0;
+ }
+ static void RaiseDivideByZero()
+ {
+ if constexpr (kDivideByZero != 0) {
+ feraiseexcept(kDivideByZero);
+ }
+ }
+ static void RaiseInexact()
+ {
+ if constexpr (kInexact != 0) {
+ feraiseexcept(kInexact);
+ }
+ }
+ static void RaiseInvalid()
+ {
+ if constexpr (kInvalid != 0) {
+ feraiseexcept(kInvalid);
+ }
+ }
+ static void RaiseOverflow()
+ {
+ if constexpr (kOverflow != 0) {
+ feraiseexcept(kOverflow);
+ }
+ }
+ static void RaiseUnderflow()
+ {
+ if constexpr (kUnderflow != 0) {
+ feraiseexcept(kUnderflow);
+ }
+ }
+
+ private:
+ bool clear_on_dst_;
+ int fpstatus_;
+};
+
+/// @} cpp_core_utility
+} // namespace np
+
+#endif // NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+
diff --git a/numpy/core/src/common/half.hpp b/numpy/core/src/common/half.hpp
index 399f2fa79..e5f3f7a40 100644
--- a/numpy/core/src/common/half.hpp
+++ b/numpy/core/src/common/half.hpp
@@ -3,11 +3,14 @@
#include "npstd.hpp"
+#include "npy_cpu_dispatch.h" // NPY_HAVE_CPU_FEATURES
+#include "half_private.hpp"
+
// TODO(@seiko2plus):
// - covers half-precision operations that being supported by numpy/halffloat.h
-// - support __fp16
-// - optimize x86 half<->single via cpu_fp16
-// - optimize ppc64 half<->single via cpu_vsx3
+// - add support for arithmetic operations
+// - enables __fp16 causes massive FP exceptions on aarch64,
+// needs a deep investigation
namespace np {
@@ -16,48 +19,246 @@ namespace np {
/// Provides a type that implements 16-bit floating point (half-precision).
/// This type is ensured to be 16-bit size.
+#if 1 // ndef __ARM_FP16_FORMAT_IEEE
class Half final {
- public:
- /// @name Public Constructors
- /// @{
+ public:
+ /// Whether `Half` has a full native HW support.
+ static constexpr bool kNative = false;
+ /// Whether `Half` has a native HW support for single/double conversion.
+ template<typename T>
+ static constexpr bool kNativeConversion = (
+ (
+ std::is_same_v<T, float> &&
+ #if defined(NPY_HAVE_FP16) || defined(NPY_HAVE_VSX3)
+ true
+ #else
+ false
+ #endif
+ ) || (
+ std::is_same_v<T, double> &&
+ #if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3)
+ true
+ #else
+ false
+ #endif
+ )
+ );
/// Default constructor. initialize nothing.
Half() = default;
- /// Copy.
- Half(const Half &r)
+
+ /// Constract from float
+ /// If there are no hardware optimization available, rounding will always
+ /// be set to ties to even.
+ explicit Half(float f)
{
- data_.u = r.data_.u;
+ #if defined(NPY_HAVE_FP16)
+ __m128 mf = _mm_load_ss(&f);
+ bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT)));
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector float vf32 = vec_splats(f);
+ __vector unsigned short vf16;
+ __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32));
+ bits_ = vec_extract(vf16, 0);
+ #else
+ bits_ = half_private::FromFloatBits(BitCast<uint32_t>(f));
+ #endif
}
- /// @}
+ /// Construct from double.
+ /// If there are no hardware optimization available, rounding will always
+ /// be set to ties to even.
+ explicit Half(double f)
+ {
+ #if defined(NPY_HAVE_AVX512FP16)
+ __m128d md = _mm_load_sd(&f);
+ bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(mf))));
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector double vf64 = vec_splats(f);
+ __vector unsigned short vf16;
+ __asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64));
+ bits_ = vec_extract(vf16, 0);
+ #else
+ bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
+ #endif
+ }
+
+ /// Cast to float
+ explicit operator float() const
+ {
+ #if defined(NPY_HAVE_FP16)
+ float ret;
+ _mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_)));
+ return ret;
+ #elif defined(NPY_HAVE_VSX3) && defined(vec_extract_fp_from_shorth)
+ return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0);
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector float vf32;
+ __asm__ __volatile__("xvcvhpsp %x0,%x1"
+ : "=wa"(vf32)
+ : "wa"(vec_splats(bits_.u)));
+ return vec_extract(vf32, 0);
+ #else
+ return BitCast<float>(half_private::ToFloatBits(bits_));
+ #endif
+ }
+
+ /// Cast to double
+ explicit operator double() const
+ {
+ #if defined(NPY_HAVE_AVX512FP16)
+ double ret;
+ _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
+ return ret;
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector float vf64;
+ __asm__ __volatile__("xvcvhpdp %x0,%x1"
+ : "=wa"(vf32)
+ : "wa"(vec_splats(bits_)));
+ return vec_extract(vf64, 0);
+ #else
+ return BitCast<double>(half_private::ToDoubleBits(bits_));
+ #endif
+ }
/// Returns a new Half constracted from the IEEE 754 binary16.
- /// @param b the value of binary16.
- static Half FromBits(uint16_t b)
+ static constexpr Half FromBits(uint16_t bits)
{
- Half f;
- f.data_.u = b;
- return f;
+ Half h{};
+ h.bits_ = bits;
+ return h;
}
/// Returns the IEEE 754 binary16 representation.
- uint16_t Bits() const
+ constexpr uint16_t Bits() const
{
- return data_.u;
+ return bits_;
}
- private:
- union {
- uint16_t u;
-/*
-TODO(@seiko2plus): support __fp16
-#ifdef NPY_HAVE_HW_FP16
- __fp16 f;
-#endif
-*/
- } data_;
+ /// @name Comparison operators (orderd)
+ /// @{
+ constexpr bool operator==(Half r) const
+ {
+ return !(IsNaN() || r.IsNaN()) && Equal(r);
+ }
+ constexpr bool operator<(Half r) const
+ {
+ return !(IsNaN() || r.IsNaN()) && Less(r);
+ }
+ constexpr bool operator<=(Half r) const
+ {
+ return !(IsNaN() || r.IsNaN()) && LessEqual(r);
+ }
+ constexpr bool operator>(Half r) const
+ {
+ return r < *this;
+ }
+ constexpr bool operator>=(Half r) const
+ {
+ return r <= *this;
+ }
+ /// @}
+
+ /// @name Comparison operators (unorderd)
+ /// @{
+ constexpr bool operator!=(Half r) const
+ {
+ return !(*this == r);
+ }
+ /// @} Comparison operators
+
+ /// @name Comparison with no guarantee of NaN behavior
+ /// @{
+ constexpr bool Less(Half r) const
+ {
+ uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
+ b = static_cast<uint_fast16_t>(r.bits_);
+ bool sign_a = (a & 0x8000u) == 0x8000u;
+ bool sign_b = (b & 0x8000u) == 0x8000u;
+ // if both `a` and `b` have same sign
+ // Test if `a` > `b` when `a` has the sign
+ // or `a` < `b` when is not.
+ // And make sure they are not equal to each other
+ // in case of both are equal to +-0
+ // else
+ // Test if `a` has the sign.
+ // and `a` != -0.0 and `b` != 0.0
+ return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b)
+ : sign_a && ((a | b) != 0x8000u);
+ }
+ constexpr bool LessEqual(Half r) const
+ {
+ uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
+ b = static_cast<uint_fast16_t>(r.bits_);
+ bool sign_a = (a & 0x8000u) == 0x8000u;
+ bool sign_b = (b & 0x8000u) == 0x8000u;
+ // if both `a` and `b` have same sign
+ // Test if `a` > `b` when `a` has the sign
+ // or `a` < `b` when is not.
+ // or a == b (needed even if we used <= above instead
+ // since testing +-0 still required)
+ // else
+ // Test if `a` has the sign
+ // or `a` and `b` equal to +-0.0
+ return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b)
+ : sign_a || ((a | b) == 0x8000u);
+ }
+ constexpr bool Equal(Half r) const
+ {
+ // fast16 cast is not worth it, since unpack op should involved.
+ uint16_t a = bits_, b = r.bits_;
+ return a == b || ((a | b) == 0x8000u);
+ }
+ /// @} Comparison
+
+ /// @name Properties
+ // @{
+ constexpr bool IsNaN() const
+ {
+ return ((bits_ & 0x7c00u) == 0x7c00u) &&
+ ((bits_ & 0x03ffu) != 0);
+ }
+ /// @} Properties
+
+ private:
+ uint16_t bits_;
+};
+#else // __ARM_FP16_FORMAT_IEEE
+class Half final {
+ public:
+ static constexpr bool kNative = true;
+ template<typename T>
+ static constexpr bool kNativeConversion = (
+ std::is_same_v<T, float> || std::is_same_v<T, double>
+ );
+ Half() = default;
+ constexpr Half(__fp16 h) : half_(h)
+ {}
+ constexpr operator __fp16() const
+ { return half_; }
+ static Half FromBits(uint16_t bits)
+ {
+ Half h;
+ h.half_ = BitCast<__fp16>(bits);
+ return h;
+ }
+ uint16_t Bits() const
+ { return BitCast<uint16_t>(half_); }
+ constexpr bool Less(Half r) const
+ { return half_ < r.half_; }
+ constexpr bool LessEqual(Half r) const
+ { return half_ <= r.half_; }
+ constexpr bool Equal(Half r) const
+ { return half_ == r.half_; }
+ constexpr bool IsNaN() const
+ { return half_ != half_; }
+
+ private:
+ __fp16 half_;
};
+#endif // __ARM_FP16_FORMAT_IEEE
/// @} cpp_core_types
} // namespace np
+
#endif // NUMPY_CORE_SRC_COMMON_HALF_HPP
diff --git a/numpy/core/src/common/half_private.hpp b/numpy/core/src/common/half_private.hpp
new file mode 100644
index 000000000..7a64eb397
--- /dev/null
+++ b/numpy/core/src/common/half_private.hpp
@@ -0,0 +1,330 @@
+#ifndef NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
+#define NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
+
+#include "npstd.hpp"
+#include "float_status.hpp"
+
+/*
+ * The following functions that emulating float/double/half conversions
+ * are copied from npymath without any changes to its functionalty.
+ */
+namespace np { namespace half_private {
+
+template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
+inline uint16_t FromFloatBits(uint32_t f)
+{
+ uint32_t f_exp, f_sig;
+ uint16_t h_sgn, h_exp, h_sig;
+
+ h_sgn = (uint16_t) ((f&0x80000000u) >> 16);
+ f_exp = (f&0x7f800000u);
+
+ /* Exponent overflow/NaN converts to signed inf/NaN */
+ if (f_exp >= 0x47800000u) {
+ if (f_exp == 0x7f800000u) {
+ /* Inf or NaN */
+ f_sig = (f&0x007fffffu);
+ if (f_sig != 0) {
+ /* NaN - propagate the flag in the significand... */
+ uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13));
+ /* ...but make sure it stays a NaN */
+ if (ret == 0x7c00u) {
+ ret++;
+ }
+ return h_sgn + ret;
+ } else {
+ /* signed inf */
+ return (uint16_t) (h_sgn + 0x7c00u);
+ }
+ } else {
+ if constexpr (gen_overflow) {
+ /* overflow to signed inf */
+ FloatStatus::RaiseOverflow();
+ }
+ return (uint16_t) (h_sgn + 0x7c00u);
+ }
+ }
+
+ /* Exponent underflow converts to a subnormal half or signed zero */
+ if (f_exp <= 0x38000000u) {
+ /*
+ * Signed zeros, subnormal floats, and floats with small
+ * exponents all convert to signed zero half-floats.
+ */
+ if (f_exp < 0x33000000u) {
+ if constexpr (gen_underflow) {
+ /* If f != 0, it underflowed to 0 */
+ if ((f&0x7fffffff) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ return h_sgn;
+ }
+ /* Make the subnormal significand */
+ f_exp >>= 23;
+ f_sig = (0x00800000u + (f&0x007fffffu));
+ if constexpr (gen_underflow) {
+ /* If it's not exactly represented, it underflowed */
+ if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ /*
+ * Usually the significand is shifted by 13. For subnormals an
+ * additional shift needs to occur. This shift is one for the largest
+ * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
+ * offsets the new first bit. At most the shift can be 1+10 bits.
+ */
+ f_sig >>= (113 - f_exp);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. However, the (113 - f_exp)
+ * shift can lose up to 11 bits, so the || checks them in the original.
+ * In all other cases, we can just add one.
+ */
+ if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
+ f_sig += 0x00001000u;
+ }
+ }
+ else {
+ f_sig += 0x00001000u;
+ }
+ h_sig = (uint16_t) (f_sig >> 13);
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp from zero to one and h_sig will be zero.
+ * This is the correct result.
+ */
+ return (uint16_t) (h_sgn + h_sig);
+ }
+
+ /* Regular case with no overflow or underflow */
+ h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ f_sig = (f&0x007fffffu);
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. In all other cases, we do.
+ */
+ if ((f_sig&0x00003fffu) != 0x00001000u) {
+ f_sig += 0x00001000u;
+ }
+ }
+ else {
+ f_sig += 0x00001000u;
+ }
+ h_sig = (uint16_t) (f_sig >> 13);
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp by one and h_sig will be zero. This is the
+ * correct result. h_exp may increment to 15, at greatest, in
+ * which case the result overflows to a signed inf.
+ */
+ if constexpr (gen_overflow) {
+ h_sig += h_exp;
+ if (h_sig == 0x7c00u) {
+ FloatStatus::RaiseOverflow();
+ }
+ return h_sgn + h_sig;
+ }
+ else {
+ return h_sgn + h_exp + h_sig;
+ }
+}
+
+template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
+inline uint16_t FromDoubleBits(uint64_t d)
+{
+ uint64_t d_exp, d_sig;
+ uint16_t h_sgn, h_exp, h_sig;
+
+ h_sgn = (d&0x8000000000000000ULL) >> 48;
+ d_exp = (d&0x7ff0000000000000ULL);
+
+ /* Exponent overflow/NaN converts to signed inf/NaN */
+ if (d_exp >= 0x40f0000000000000ULL) {
+ if (d_exp == 0x7ff0000000000000ULL) {
+ /* Inf or NaN */
+ d_sig = (d&0x000fffffffffffffULL);
+ if (d_sig != 0) {
+ /* NaN - propagate the flag in the significand... */
+ uint16_t ret = (uint16_t) (0x7c00u + (d_sig >> 42));
+ /* ...but make sure it stays a NaN */
+ if (ret == 0x7c00u) {
+ ret++;
+ }
+ return h_sgn + ret;
+ } else {
+ /* signed inf */
+ return h_sgn + 0x7c00u;
+ }
+ } else {
+ /* overflow to signed inf */
+ if constexpr (gen_overflow) {
+ FloatStatus::RaiseOverflow();
+ }
+ return h_sgn + 0x7c00u;
+ }
+ }
+
+ /* Exponent underflow converts to subnormal half or signed zero */
+ if (d_exp <= 0x3f00000000000000ULL) {
+ /*
+ * Signed zeros, subnormal floats, and floats with small
+ * exponents all convert to signed zero half-floats.
+ */
+ if (d_exp < 0x3e60000000000000ULL) {
+ if constexpr (gen_underflow) {
+ /* If d != 0, it underflowed to 0 */
+ if ((d&0x7fffffffffffffffULL) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ return h_sgn;
+ }
+ /* Make the subnormal significand */
+ d_exp >>= 52;
+ d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
+ if constexpr (gen_underflow) {
+ /* If it's not exactly represented, it underflowed */
+ if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ /*
+ * Unlike floats, doubles have enough room to shift left to align
+ * the subnormal significand leading to no loss of the last bits.
+ * The smallest possible exponent giving a subnormal is:
+ * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
+ * shifted with respect to it. This adds a shift of 10+1 bits the final
+ * right shift when comparing it to the one in the normal branch.
+ */
+ assert(d_exp - 998 >= 0);
+ d_sig <<= (d_exp - 998);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. In all other cases, we do.
+ */
+ if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
+ d_sig += 0x0010000000000000ULL;
+ }
+ }
+ else {
+ d_sig += 0x0010000000000000ULL;
+ }
+ h_sig = (uint16_t) (d_sig >> 53);
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp from zero to one and h_sig will be zero.
+ * This is the correct result.
+ */
+ return h_sgn + h_sig;
+ }
+
+ /* Regular case with no overflow or underflow */
+ h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ d_sig = (d&0x000fffffffffffffULL);
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. In all other cases, we do.
+ */
+ if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+ d_sig += 0x0000020000000000ULL;
+ }
+ }
+ else {
+ d_sig += 0x0000020000000000ULL;
+ }
+ h_sig = (uint16_t) (d_sig >> 42);
+
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp by one and h_sig will be zero. This is the
+ * correct result. h_exp may increment to 15, at greatest, in
+ * which case the result overflows to a signed inf.
+ */
+ if constexpr (gen_overflow) {
+ h_sig += h_exp;
+ if (h_sig == 0x7c00u) {
+ FloatStatus::RaiseOverflow();
+ }
+ return h_sgn + h_sig;
+ }
+ else {
+ return h_sgn + h_exp + h_sig;
+ }
+}
+
+constexpr uint32_t ToFloatBits(uint16_t h)
+{
+ uint16_t h_exp = (h&0x7c00u);
+ uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16;
+ switch (h_exp) {
+ case 0x0000u: { // 0 or subnormal
+ uint16_t h_sig = (h&0x03ffu);
+ // Signed zero
+ if (h_sig == 0) {
+ return f_sgn;
+ }
+ // Subnormal
+ h_sig <<= 1;
+ while ((h_sig&0x0400u) == 0) {
+ h_sig <<= 1;
+ h_exp++;
+ }
+ uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23;
+ uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13;
+ return f_sgn + f_exp + f_sig;
+ }
+ case 0x7c00u: // inf or NaN
+ // All-ones exponent and a copy of the significand
+ return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13);
+ default: // normalized
+ // Just need to adjust the exponent and shift
+ return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13);
+ }
+}
+
+constexpr uint64_t ToDoubleBits(uint16_t h)
+{
+ uint16_t h_exp = (h&0x7c00u);
+ uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48;
+ switch (h_exp) {
+ case 0x0000u: { // 0 or subnormal
+ uint16_t h_sig = (h&0x03ffu);
+ // Signed zero
+ if (h_sig == 0) {
+ return d_sgn;
+ }
+ // Subnormal
+ h_sig <<= 1;
+ while ((h_sig&0x0400u) == 0) {
+ h_sig <<= 1;
+ h_exp++;
+ }
+ uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52;
+ uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42;
+ return d_sgn + d_exp + d_sig;
+ }
+ case 0x7c00u: // inf or NaN
+ // All-ones exponent and a copy of the significand
+ return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42);
+ default: // normalized
+ // Just need to adjust the exponent and shift
+ return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42);
+ }
+}
+
+}} // namespace np::half_private
+#endif // NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
diff --git a/numpy/core/src/common/npdef.hpp b/numpy/core/src/common/npdef.hpp
new file mode 100644
index 000000000..56a0df52e
--- /dev/null
+++ b/numpy/core/src/common/npdef.hpp
@@ -0,0 +1,28 @@
+#ifndef NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+#define NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+
+#if !defined(__cplusplus) || __cplusplus < 201703L
+ #error "NumPy requires a compiler with at least C++17 enabled"
+#endif
+
+/// @addtogroup cpp_core_defs
+/// @{
+
+/// Whether compiler supports C++20
+#if __cplusplus > 202002L
+ #define NP_HAS_CPP20 1
+#else
+ #define NP_HAS_CPP20 0
+#endif
+
+/// Wraps `__has_builtin`
+#if defined(__has_builtin)
+ #define NP_HAS_BUILTIN(INTRIN) __has_builtin(INTRIN)
+#else
+ #define NP_HAS_BUILTIN(INTRIN) 0
+#endif
+
+/// @} cpp_core_defs
+
+#endif // NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+
diff --git a/numpy/core/src/common/npstd.hpp b/numpy/core/src/common/npstd.hpp
index 71993bd7c..ca664229a 100644
--- a/numpy/core/src/common/npstd.hpp
+++ b/numpy/core/src/common/npstd.hpp
@@ -31,6 +31,8 @@ using std::int64_t;
using std::uintptr_t;
using std::intptr_t;
using std::complex;
+using std::uint_fast16_t;
+using std::uint_fast32_t;
/** Guard for long double.
*
diff --git a/numpy/core/src/common/utils.hpp b/numpy/core/src/common/utils.hpp
new file mode 100644
index 000000000..f847cab44
--- /dev/null
+++ b/numpy/core/src/common/utils.hpp
@@ -0,0 +1,51 @@
+#ifndef NUMPY_CORE_SRC_COMMON_UTILS_HPP
+#define NUMPY_CORE_SRC_COMMON_UTILS_HPP
+
+#include "npdef.hpp"
+
+#if NP_HAS_CPP20
+ #include <bit>
+#endif
+
+#include <type_traits>
+#include <string.h>
+
+namespace np {
+
+/** Create a value of type `To` from the bits of `from`.
+ *
+ * similar to `std::bit_cast` but compatible with C++17,
+ * should perform similar to `*reinterpret_cast<To*>(&from)`
+ * or through punning without expecting any undefined behaviors.
+ */
+template<typename To, typename From>
+#if NP_HAS_BUILTIN(__builtin_bit_cast) || NP_HAS_CPP20
+[[nodiscard]] constexpr
+#else
+inline
+#endif
+To BitCast(const From &from) noexcept
+{
+ static_assert(
+ sizeof(To) == sizeof(From),
+ "both data types must have the same size");
+
+ static_assert(
+ std::is_trivially_copyable_v<To> &&
+ std::is_trivially_copyable_v<From>,
+ "both data types must be trivially copyable");
+
+#if NP_HAS_CPP20
+ return std::bit_cast<To>(from);
+#elif NP_HAS_BUILTIN(__builtin_bit_cast)
+ return __builtin_bit_cast(To, from);
+#else
+ To to;
+ memcpy(&to, &from, sizeof(from));
+ return to;
+#endif
+}
+
+} // namespace np
+#endif // NUMPY_CORE_SRC_COMMON_UTILS_HPP
+
diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c
deleted file mode 100644
index 51948c736..000000000
--- a/numpy/core/src/npymath/halffloat.c
+++ /dev/null
@@ -1,555 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-
-#include "numpy/halffloat.h"
-
-/*
- * This chooses between 'ties to even' and 'ties away from zero'.
- */
-#define NPY_HALF_ROUND_TIES_TO_EVEN 1
-/*
- * If these are 1, the conversions try to trigger underflow,
- * overflow, and invalid exceptions in the FP system when needed.
- */
-#define NPY_HALF_GENERATE_OVERFLOW 1
-#define NPY_HALF_GENERATE_UNDERFLOW 1
-#define NPY_HALF_GENERATE_INVALID 1
-
-/*
- ********************************************************************
- * HALF-PRECISION ROUTINES *
- ********************************************************************
- */
-
-float npy_half_to_float(npy_half h)
-{
- union { float ret; npy_uint32 retbits; } conv;
- conv.retbits = npy_halfbits_to_floatbits(h);
- return conv.ret;
-}
-
-double npy_half_to_double(npy_half h)
-{
- union { double ret; npy_uint64 retbits; } conv;
- conv.retbits = npy_halfbits_to_doublebits(h);
- return conv.ret;
-}
-
-npy_half npy_float_to_half(float f)
-{
- union { float f; npy_uint32 fbits; } conv;
- conv.f = f;
- return npy_floatbits_to_halfbits(conv.fbits);
-}
-
-npy_half npy_double_to_half(double d)
-{
- union { double d; npy_uint64 dbits; } conv;
- conv.d = d;
- return npy_doublebits_to_halfbits(conv.dbits);
-}
-
-int npy_half_iszero(npy_half h)
-{
- return (h&0x7fff) == 0;
-}
-
-int npy_half_isnan(npy_half h)
-{
- return ((h&0x7c00u) == 0x7c00u) && ((h&0x03ffu) != 0x0000u);
-}
-
-int npy_half_isinf(npy_half h)
-{
- return ((h&0x7fffu) == 0x7c00u);
-}
-
-int npy_half_isfinite(npy_half h)
-{
- return ((h&0x7c00u) != 0x7c00u);
-}
-
-int npy_half_signbit(npy_half h)
-{
- return (h&0x8000u) != 0;
-}
-
-npy_half npy_half_spacing(npy_half h)
-{
- npy_half ret;
- npy_uint16 h_exp = h&0x7c00u;
- npy_uint16 h_sig = h&0x03ffu;
- if (h_exp == 0x7c00u) {
-#if NPY_HALF_GENERATE_INVALID
- npy_set_floatstatus_invalid();
-#endif
- ret = NPY_HALF_NAN;
- } else if (h == 0x7bffu) {
-#if NPY_HALF_GENERATE_OVERFLOW
- npy_set_floatstatus_overflow();
-#endif
- ret = NPY_HALF_PINF;
- } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */
- if (h_exp > 0x2c00u) { /* If result is normalized */
- ret = h_exp - 0x2c00u;
- } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
- ret = 1 << ((h_exp >> 10) - 2);
- } else {
- ret = 0x0001u; /* Smallest subnormal half */
- }
- } else if (h_exp > 0x2800u) { /* If result is still normalized */
- ret = h_exp - 0x2800u;
- } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
- ret = 1 << ((h_exp >> 10) - 1);
- } else {
- ret = 0x0001u;
- }
-
- return ret;
-}
-
-npy_half npy_half_copysign(npy_half x, npy_half y)
-{
- return (x&0x7fffu) | (y&0x8000u);
-}
-
-npy_half npy_half_nextafter(npy_half x, npy_half y)
-{
- npy_half ret;
-
- if (npy_half_isnan(x) || npy_half_isnan(y)) {
- ret = NPY_HALF_NAN;
- } else if (npy_half_eq_nonan(x, y)) {
- ret = x;
- } else if (npy_half_iszero(x)) {
- ret = (y&0x8000u) + 1; /* Smallest subnormal half */
- } else if (!(x&0x8000u)) { /* x > 0 */
- if ((npy_int16)x > (npy_int16)y) { /* x > y */
- ret = x-1;
- } else {
- ret = x+1;
- }
- } else {
- if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */
- ret = x-1;
- } else {
- ret = x+1;
- }
- }
-#if NPY_HALF_GENERATE_OVERFLOW
- if (npy_half_isinf(ret) && npy_half_isfinite(x)) {
- npy_set_floatstatus_overflow();
- }
-#endif
-
- return ret;
-}
-
-int npy_half_eq_nonan(npy_half h1, npy_half h2)
-{
- return (h1 == h2 || ((h1 | h2) & 0x7fff) == 0);
-}
-
-int npy_half_eq(npy_half h1, npy_half h2)
-{
- /*
- * The equality cases are as follows:
- * - If either value is NaN, never equal.
- * - If the values are equal, equal.
- * - If the values are both signed zeros, equal.
- */
- return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) &&
- (h1 == h2 || ((h1 | h2) & 0x7fff) == 0);
-}
-
-int npy_half_ne(npy_half h1, npy_half h2)
-{
- return !npy_half_eq(h1, h2);
-}
-
-int npy_half_lt_nonan(npy_half h1, npy_half h2)
-{
- if (h1&0x8000u) {
- if (h2&0x8000u) {
- return (h1&0x7fffu) > (h2&0x7fffu);
- } else {
- /* Signed zeros are equal, have to check for it */
- return (h1 != 0x8000u) || (h2 != 0x0000u);
- }
- } else {
- if (h2&0x8000u) {
- return 0;
- } else {
- return (h1&0x7fffu) < (h2&0x7fffu);
- }
- }
-}
-
-int npy_half_lt(npy_half h1, npy_half h2)
-{
- return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_lt_nonan(h1, h2);
-}
-
-int npy_half_gt(npy_half h1, npy_half h2)
-{
- return npy_half_lt(h2, h1);
-}
-
-int npy_half_le_nonan(npy_half h1, npy_half h2)
-{
- if (h1&0x8000u) {
- if (h2&0x8000u) {
- return (h1&0x7fffu) >= (h2&0x7fffu);
- } else {
- return 1;
- }
- } else {
- if (h2&0x8000u) {
- /* Signed zeros are equal, have to check for it */
- return (h1 == 0x0000u) && (h2 == 0x8000u);
- } else {
- return (h1&0x7fffu) <= (h2&0x7fffu);
- }
- }
-}
-
-int npy_half_le(npy_half h1, npy_half h2)
-{
- return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_le_nonan(h1, h2);
-}
-
-int npy_half_ge(npy_half h1, npy_half h2)
-{
- return npy_half_le(h2, h1);
-}
-
-npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus)
-{
- float fh1 = npy_half_to_float(h1);
- float fh2 = npy_half_to_float(h2);
- float div, mod;
-
- div = npy_divmodf(fh1, fh2, &mod);
- *modulus = npy_float_to_half(mod);
- return npy_float_to_half(div);
-}
-
-
-
-/*
- ********************************************************************
- * BIT-LEVEL CONVERSIONS *
- ********************************************************************
- */
-
-npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
-{
- npy_uint32 f_exp, f_sig;
- npy_uint16 h_sgn, h_exp, h_sig;
-
- h_sgn = (npy_uint16) ((f&0x80000000u) >> 16);
- f_exp = (f&0x7f800000u);
-
- /* Exponent overflow/NaN converts to signed inf/NaN */
- if (f_exp >= 0x47800000u) {
- if (f_exp == 0x7f800000u) {
- /* Inf or NaN */
- f_sig = (f&0x007fffffu);
- if (f_sig != 0) {
- /* NaN - propagate the flag in the significand... */
- npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13));
- /* ...but make sure it stays a NaN */
- if (ret == 0x7c00u) {
- ret++;
- }
- return h_sgn + ret;
- } else {
- /* signed inf */
- return (npy_uint16) (h_sgn + 0x7c00u);
- }
- } else {
- /* overflow to signed inf */
-#if NPY_HALF_GENERATE_OVERFLOW
- npy_set_floatstatus_overflow();
-#endif
- return (npy_uint16) (h_sgn + 0x7c00u);
- }
- }
-
- /* Exponent underflow converts to a subnormal half or signed zero */
- if (f_exp <= 0x38000000u) {
- /*
- * Signed zeros, subnormal floats, and floats with small
- * exponents all convert to signed zero half-floats.
- */
- if (f_exp < 0x33000000u) {
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If f != 0, it underflowed to 0 */
- if ((f&0x7fffffff) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- return h_sgn;
- }
- /* Make the subnormal significand */
- f_exp >>= 23;
- f_sig = (0x00800000u + (f&0x007fffffu));
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If it's not exactly represented, it underflowed */
- if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- /*
- * Usually the significand is shifted by 13. For subnormals an
- * additional shift needs to occur. This shift is one for the largest
- * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
- * offsets the new first bit. At most the shift can be 1+10 bits.
- */
- f_sig >>= (113 - f_exp);
- /* Handle rounding by adding 1 to the bit beyond half precision */
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. However, the (113 - f_exp)
- * shift can lose up to 11 bits, so the || checks them in the original.
- * In all other cases, we can just add one.
- */
- if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
- f_sig += 0x00001000u;
- }
-#else
- f_sig += 0x00001000u;
-#endif
- h_sig = (npy_uint16) (f_sig >> 13);
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp from zero to one and h_sig will be zero.
- * This is the correct result.
- */
- return (npy_uint16) (h_sgn + h_sig);
- }
-
- /* Regular case with no overflow or underflow */
- h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13);
- /* Handle rounding by adding 1 to the bit beyond half precision */
- f_sig = (f&0x007fffffu);
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. In all other cases, we do.
- */
- if ((f_sig&0x00003fffu) != 0x00001000u) {
- f_sig += 0x00001000u;
- }
-#else
- f_sig += 0x00001000u;
-#endif
- h_sig = (npy_uint16) (f_sig >> 13);
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp by one and h_sig will be zero. This is the
- * correct result. h_exp may increment to 15, at greatest, in
- * which case the result overflows to a signed inf.
- */
-#if NPY_HALF_GENERATE_OVERFLOW
- h_sig += h_exp;
- if (h_sig == 0x7c00u) {
- npy_set_floatstatus_overflow();
- }
- return h_sgn + h_sig;
-#else
- return h_sgn + h_exp + h_sig;
-#endif
-}
-
-npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
-{
- npy_uint64 d_exp, d_sig;
- npy_uint16 h_sgn, h_exp, h_sig;
-
- h_sgn = (d&0x8000000000000000ULL) >> 48;
- d_exp = (d&0x7ff0000000000000ULL);
-
- /* Exponent overflow/NaN converts to signed inf/NaN */
- if (d_exp >= 0x40f0000000000000ULL) {
- if (d_exp == 0x7ff0000000000000ULL) {
- /* Inf or NaN */
- d_sig = (d&0x000fffffffffffffULL);
- if (d_sig != 0) {
- /* NaN - propagate the flag in the significand... */
- npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42));
- /* ...but make sure it stays a NaN */
- if (ret == 0x7c00u) {
- ret++;
- }
- return h_sgn + ret;
- } else {
- /* signed inf */
- return h_sgn + 0x7c00u;
- }
- } else {
- /* overflow to signed inf */
-#if NPY_HALF_GENERATE_OVERFLOW
- npy_set_floatstatus_overflow();
-#endif
- return h_sgn + 0x7c00u;
- }
- }
-
- /* Exponent underflow converts to subnormal half or signed zero */
- if (d_exp <= 0x3f00000000000000ULL) {
- /*
- * Signed zeros, subnormal floats, and floats with small
- * exponents all convert to signed zero half-floats.
- */
- if (d_exp < 0x3e60000000000000ULL) {
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If d != 0, it underflowed to 0 */
- if ((d&0x7fffffffffffffffULL) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- return h_sgn;
- }
- /* Make the subnormal significand */
- d_exp >>= 52;
- d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If it's not exactly represented, it underflowed */
- if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- /*
- * Unlike floats, doubles have enough room to shift left to align
- * the subnormal significand leading to no loss of the last bits.
- * The smallest possible exponent giving a subnormal is:
- * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
- * shifted with respect to it. This adds a shift of 10+1 bits the final
- * right shift when comparing it to the one in the normal branch.
- */
- assert(d_exp - 998 >= 0);
- d_sig <<= (d_exp - 998);
- /* Handle rounding by adding 1 to the bit beyond half precision */
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. In all other cases, we do.
- */
- if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
- d_sig += 0x0010000000000000ULL;
- }
-#else
- d_sig += 0x0010000000000000ULL;
-#endif
- h_sig = (npy_uint16) (d_sig >> 53);
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp from zero to one and h_sig will be zero.
- * This is the correct result.
- */
- return h_sgn + h_sig;
- }
-
- /* Regular case with no overflow or underflow */
- h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42);
- /* Handle rounding by adding 1 to the bit beyond half precision */
- d_sig = (d&0x000fffffffffffffULL);
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. In all other cases, we do.
- */
- if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
- d_sig += 0x0000020000000000ULL;
- }
-#else
- d_sig += 0x0000020000000000ULL;
-#endif
- h_sig = (npy_uint16) (d_sig >> 42);
-
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp by one and h_sig will be zero. This is the
- * correct result. h_exp may increment to 15, at greatest, in
- * which case the result overflows to a signed inf.
- */
-#if NPY_HALF_GENERATE_OVERFLOW
- h_sig += h_exp;
- if (h_sig == 0x7c00u) {
- npy_set_floatstatus_overflow();
- }
- return h_sgn + h_sig;
-#else
- return h_sgn + h_exp + h_sig;
-#endif
-}
-
-npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
-{
- npy_uint16 h_exp, h_sig;
- npy_uint32 f_sgn, f_exp, f_sig;
-
- h_exp = (h&0x7c00u);
- f_sgn = ((npy_uint32)h&0x8000u) << 16;
- switch (h_exp) {
- case 0x0000u: /* 0 or subnormal */
- h_sig = (h&0x03ffu);
- /* Signed zero */
- if (h_sig == 0) {
- return f_sgn;
- }
- /* Subnormal */
- h_sig <<= 1;
- while ((h_sig&0x0400u) == 0) {
- h_sig <<= 1;
- h_exp++;
- }
- f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
- f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13;
- return f_sgn + f_exp + f_sig;
- case 0x7c00u: /* inf or NaN */
- /* All-ones exponent and a copy of the significand */
- return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13);
- default: /* normalized */
- /* Just need to adjust the exponent and shift */
- return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13);
- }
-}
-
-npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
-{
- npy_uint16 h_exp, h_sig;
- npy_uint64 d_sgn, d_exp, d_sig;
-
- h_exp = (h&0x7c00u);
- d_sgn = ((npy_uint64)h&0x8000u) << 48;
- switch (h_exp) {
- case 0x0000u: /* 0 or subnormal */
- h_sig = (h&0x03ffu);
- /* Signed zero */
- if (h_sig == 0) {
- return d_sgn;
- }
- /* Subnormal */
- h_sig <<= 1;
- while ((h_sig&0x0400u) == 0) {
- h_sig <<= 1;
- h_exp++;
- }
- d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
- d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42;
- return d_sgn + d_exp + d_sig;
- case 0x7c00u: /* inf or NaN */
- /* All-ones exponent and a copy of the significand */
- return d_sgn + 0x7ff0000000000000ULL +
- (((npy_uint64)(h&0x03ffu)) << 42);
- default: /* normalized */
- /* Just need to adjust the exponent and shift */
- return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42);
- }
-}
diff --git a/numpy/core/src/npymath/halffloat.cpp b/numpy/core/src/npymath/halffloat.cpp
new file mode 100644
index 000000000..aa582c1b9
--- /dev/null
+++ b/numpy/core/src/npymath/halffloat.cpp
@@ -0,0 +1,238 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+/*
+ * If these are 1, the conversions try to trigger underflow,
+ * overflow, and invalid exceptions in the FP system when needed.
+ */
+#define NPY_HALF_GENERATE_OVERFLOW 1
+#define NPY_HALF_GENERATE_INVALID 1
+
+#include "numpy/halffloat.h"
+
+#include "common.hpp"
+/*
+ ********************************************************************
+ * HALF-PRECISION ROUTINES *
+ ********************************************************************
+ */
+using namespace np;
+
+float npy_half_to_float(npy_half h)
+{
+ return static_cast<float>(Half::FromBits(h));
+}
+
+double npy_half_to_double(npy_half h)
+{
+ return static_cast<double>(Half::FromBits(h));
+}
+
+npy_half npy_float_to_half(float f)
+{
+ return Half(f).Bits();
+}
+
+npy_half npy_double_to_half(double d)
+{
+ return Half(d).Bits();
+}
+
+int npy_half_iszero(npy_half h)
+{
+ return (h&0x7fff) == 0;
+}
+
+int npy_half_isnan(npy_half h)
+{
+ return Half::FromBits(h).IsNaN();
+}
+
+int npy_half_isinf(npy_half h)
+{
+ return ((h&0x7fffu) == 0x7c00u);
+}
+
+int npy_half_isfinite(npy_half h)
+{
+ return ((h&0x7c00u) != 0x7c00u);
+}
+
+int npy_half_signbit(npy_half h)
+{
+ return (h&0x8000u) != 0;
+}
+
+npy_half npy_half_spacing(npy_half h)
+{
+ npy_half ret;
+ npy_uint16 h_exp = h&0x7c00u;
+ npy_uint16 h_sig = h&0x03ffu;
+ if (h_exp == 0x7c00u) {
+#if NPY_HALF_GENERATE_INVALID
+ npy_set_floatstatus_invalid();
+#endif
+ ret = NPY_HALF_NAN;
+ } else if (h == 0x7bffu) {
+#if NPY_HALF_GENERATE_OVERFLOW
+ npy_set_floatstatus_overflow();
+#endif
+ ret = NPY_HALF_PINF;
+ } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */
+ if (h_exp > 0x2c00u) { /* If result is normalized */
+ ret = h_exp - 0x2c00u;
+ } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
+ ret = 1 << ((h_exp >> 10) - 2);
+ } else {
+ ret = 0x0001u; /* Smallest subnormal half */
+ }
+ } else if (h_exp > 0x2800u) { /* If result is still normalized */
+ ret = h_exp - 0x2800u;
+ } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
+ ret = 1 << ((h_exp >> 10) - 1);
+ } else {
+ ret = 0x0001u;
+ }
+
+ return ret;
+}
+
+npy_half npy_half_copysign(npy_half x, npy_half y)
+{
+ return (x&0x7fffu) | (y&0x8000u);
+}
+
+npy_half npy_half_nextafter(npy_half x, npy_half y)
+{
+ npy_half ret;
+
+ if (npy_half_isnan(x) || npy_half_isnan(y)) {
+ ret = NPY_HALF_NAN;
+ } else if (npy_half_eq_nonan(x, y)) {
+ ret = x;
+ } else if (npy_half_iszero(x)) {
+ ret = (y&0x8000u) + 1; /* Smallest subnormal half */
+ } else if (!(x&0x8000u)) { /* x > 0 */
+ if ((npy_int16)x > (npy_int16)y) { /* x > y */
+ ret = x-1;
+ } else {
+ ret = x+1;
+ }
+ } else {
+ if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */
+ ret = x-1;
+ } else {
+ ret = x+1;
+ }
+ }
+#if NPY_HALF_GENERATE_OVERFLOW
+ if (npy_half_isinf(ret) && npy_half_isfinite(x)) {
+ npy_set_floatstatus_overflow();
+ }
+#endif
+
+ return ret;
+}
+
+int npy_half_eq_nonan(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1).Equal(Half::FromBits(h2));
+}
+
+int npy_half_eq(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) == Half::FromBits(h2);
+}
+
+int npy_half_ne(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) != Half::FromBits(h2);
+}
+
+int npy_half_lt_nonan(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1).Less(Half::FromBits(h2));
+}
+
+int npy_half_lt(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) < Half::FromBits(h2);
+}
+
+int npy_half_gt(npy_half h1, npy_half h2)
+{
+ return npy_half_lt(h2, h1);
+}
+
+int npy_half_le_nonan(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1).LessEqual(Half::FromBits(h2));
+}
+
+int npy_half_le(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) <= Half::FromBits(h2);
+}
+
+int npy_half_ge(npy_half h1, npy_half h2)
+{
+ return npy_half_le(h2, h1);
+}
+
+npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus)
+{
+ float fh1 = npy_half_to_float(h1);
+ float fh2 = npy_half_to_float(h2);
+ float div, mod;
+
+ div = npy_divmodf(fh1, fh2, &mod);
+ *modulus = npy_float_to_half(mod);
+ return npy_float_to_half(div);
+}
+
+
+/*
+ ********************************************************************
+ * BIT-LEVEL CONVERSIONS *
+ ********************************************************************
+ */
+
+npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
+{
+ if constexpr (Half::kNativeConversion<float>) {
+ return BitCast<uint16_t>(Half(BitCast<float>(f)));
+ }
+ else {
+ return half_private::FromFloatBits(f);
+ }
+}
+
+npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
+{
+ if constexpr (Half::kNativeConversion<double>) {
+ return BitCast<uint16_t>(Half(BitCast<double>(d)));
+ }
+ else {
+ return half_private::FromDoubleBits(d);
+ }
+}
+
+npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
+{
+ if constexpr (Half::kNativeConversion<float>) {
+ return BitCast<uint32_t>(static_cast<float>(Half::FromBits(h)));
+ }
+ else {
+ return half_private::ToFloatBits(h);
+ }
+}
+
+npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
+{
+ if constexpr (Half::kNativeConversion<double>) {
+ return BitCast<uint64_t>(static_cast<double>(Half::FromBits(h)));
+ }
+ else {
+ return half_private::ToDoubleBits(h);
+ }
+}
+