[clang] [llvm] [HexFloat] add HexFloat to APFloat (PR #179771)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 13 10:42:15 PST 2026
https://github.com/Ariel-Burton updated https://github.com/llvm/llvm-project/pull/179771
>From ca410598b85ed62d1085d3b595bcc658d1ae315d Mon Sep 17 00:00:00 2001
From: Ariel Burton <ariel.burton at ibm.com>
Date: Wed, 4 Feb 2026 13:59:38 -0600
Subject: [PATCH 1/4] [HexFloat] add HexFloat to APFloat
This PR adds the HexFloat floating point representation
format to the APFloat.
See the Discourse RFC at
https://discourse.llvm.org/t/rfc-hexfloat-floating-point-support/75833
---
clang/lib/AST/MicrosoftMangle.cpp | 3 +
llvm/include/llvm/ADT/APFloat.h | 275 ++-
llvm/include/llvm/ADT/APInt.h | 8 +-
llvm/lib/Support/APFloat.cpp | 2568 +++++++++++++++++++++++++---
llvm/unittests/ADT/APFloatTest.cpp | 1500 +++++++++++++++-
5 files changed, 4064 insertions(+), 290 deletions(-)
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 551aa7bf3321c..6006a2c113fdc 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1028,6 +1028,9 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) {
case APFloat::S_Float6E3M2FN:
case APFloat::S_Float6E2M3FN:
case APFloat::S_Float4E2M1FN:
+ case APFloat::S_HexFP32:
+ case APFloat::S_HexFP64:
+ case APFloat::S_HexFP128:
llvm_unreachable("Tried to mangle unexpected APFloat semantics");
}
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 44fa3919962c4..c60b7eccce561 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -29,6 +29,8 @@
return U.IEEE.METHOD_CALL; \
if (usesLayout<DoubleAPFloat>(getSemantics())) \
return U.Double.METHOD_CALL; \
+ if (usesLayout<HexFloat>(getSemantics())) \
+ return U.Hex.METHOD_CALL; \
llvm_unreachable("Unexpected semantics"); \
} while (false)
@@ -257,7 +259,13 @@ class APFloatBase {
S_Float4E2M1FN,
// TODO: Documentation is missing.
S_x87DoubleExtended,
- S_MaxSemantics = S_x87DoubleExtended,
+
+ // HexFloat
+ S_HexFP32,
+ S_HexFP64,
+ S_HexFP128,
+
+ S_MaxSemantics = S_HexFP128,
};
LLVM_ABI static const llvm::fltSemantics &EnumToSemantics(Semantics S);
@@ -285,6 +293,9 @@ class APFloatBase {
LLVM_ABI static const fltSemantics semBogus;
LLVM_ABI static const fltSemantics semPPCDoubleDouble;
LLVM_ABI static const fltSemantics semPPCDoubleDoubleLegacy;
+ LLVM_ABI static const fltSemantics semHex_FP32;
+ LLVM_ABI static const fltSemantics semHex_FP64;
+ LLVM_ABI static const fltSemantics semHex_FP128;
friend class detail::IEEEFloat;
friend class detail::DoubleAPFloat;
@@ -317,6 +328,9 @@ class APFloatBase {
static const fltSemantics &x87DoubleExtended() {
return semX87DoubleExtended;
}
+ static const fltSemantics &HexFP32() { return semHex_FP32; }
+ static const fltSemantics &HexFP64() { return semHex_FP64; }
+ static const fltSemantics &HexFP128() { return semHex_FP128; }
/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
/// anything real.
@@ -386,12 +400,14 @@ class APFloatBase {
IEK_Inf = INT_MAX
};
+ LLVM_ABI static const char *semanticsName(const fltSemantics &);
LLVM_ABI static unsigned int semanticsPrecision(const fltSemantics &);
LLVM_ABI static ExponentType semanticsMinExponent(const fltSemantics &);
LLVM_ABI static ExponentType semanticsMaxExponent(const fltSemantics &);
LLVM_ABI static unsigned int semanticsSizeInBits(const fltSemantics &);
LLVM_ABI static unsigned int semanticsIntSizeInBits(const fltSemantics &,
bool);
+ LLVM_ABI static unsigned int semanticsRadix(const fltSemantics &);
LLVM_ABI static bool semanticsHasZero(const fltSemantics &);
LLVM_ABI static bool semanticsHasSignedRepr(const fltSemantics &);
LLVM_ABI static bool semanticsHasInf(const fltSemantics &);
@@ -673,6 +689,12 @@ class IEEEFloat final {
LLVM_ABI cmpResult compareAbsoluteValue(const IEEEFloat &) const;
+ /// returns the exponent
+ LLVM_ABI ExponentType getExponent() const;
+
+ /// returns the significand
+ LLVM_ABI APInt getSignificand() const;
+
private:
/// \name Simple Queries
/// @{
@@ -938,6 +960,171 @@ LLVM_ABI DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
roundingMode RM);
LLVM_ABI DoubleAPFloat frexp(const DoubleAPFloat &X, int &Exp, roundingMode);
+class HexFloat final {
+ friend class HexFloatArith;
+
+ /// Note: this must be the first data member.
+ /// The semantics that this value obeys.
+ const fltSemantics *semantics;
+
+ /// A binary fraction with an explicit integer bit.
+ APInt significand;
+
+ /// The signed unbiased exponent of the value.
+ ExponentType exponent;
+
+ /// The exponent in the low-order part of a HexFloat 128.
+ /// This is ignored during computation, but must be preserved so that a
+ /// bitcast to APInt and back is an identity operation.
+ ExponentType low_exponent;
+
+ /// What kind of floating point number this is.
+ ///
+ /// Only 2 bits are required, but VisualStudio incorrectly sign extends it.
+ /// Using the extra bit keeps it from failing under VisualStudio.
+ fltCategory category : 3;
+
+ /// Sign bit of the number.
+ unsigned int sign : 1;
+
+ /// The sign bit in the low-order part of a HexFloat 128.
+ /// This is ignored; see comment for low_exponent.
+ unsigned int low_sign : 1;
+
+ void initialize(const fltSemantics *);
+ void assign(const HexFloat &);
+ void copySignificand(const HexFloat &);
+ void freeSignificand();
+
+ opStatus convertToSignExtendedInteger(MutableArrayRef<integerPart> Input,
+ unsigned int Width, bool IsSigned,
+ roundingMode RM, bool *IsExact) const;
+
+ static bool roundAwayFromZero(int sign, const APInt &fraction,
+ roundingMode RM, lostFraction loast_fraction,
+ int truncatedBits);
+
+public:
+ HexFloat(const fltSemantics &); // Default construct to +0.0
+ HexFloat(const fltSemantics &, integerPart);
+ HexFloat(const fltSemantics &, uninitializedTag);
+ HexFloat(const fltSemantics &, const APInt &);
+ explicit HexFloat(double d);
+ explicit HexFloat(float f);
+ HexFloat(const HexFloat &);
+ HexFloat(HexFloat &&);
+ ~HexFloat();
+
+ HexFloat &operator=(const HexFloat &);
+ HexFloat &operator=(HexFloat &&);
+
+ fltCategory getCategory() const;
+
+ void makeZero(bool Neg);
+ void makeInf(bool Neg);
+ void makeNaN(bool SNan = false, bool Neg = false,
+ const APInt *fill = nullptr);
+ void makeLargest(bool Neg = false);
+ void makeSmallest(bool Neg = false);
+ void makeSmallestNormalized(bool Neg = false);
+
+ bool needsCleanup() const;
+
+ opStatus add(const HexFloat &RHS, roundingMode RM);
+ opStatus subtract(const HexFloat &RHS, roundingMode RM);
+ opStatus multiply(const HexFloat &RHS, roundingMode RM);
+ opStatus divide(const HexFloat &RHS, roundingMode RM);
+ opStatus remainder(const HexFloat &RHS);
+ opStatus mod(const HexFloat &RHS);
+ opStatus fusedMultiplyAdd(const HexFloat &Multiplicand,
+ const HexFloat &Addend, roundingMode RM);
+ opStatus roundToIntegral(roundingMode RM);
+ opStatus next(bool nextDown);
+
+ void changeSign();
+
+ opStatus convertToInteger(MutableArrayRef<integerPart> Input,
+ unsigned int Width, bool IsSigned, roundingMode RM,
+ bool *IsExact) const;
+ opStatus convertFromAPInt(const APInt &Input, bool isSigned, roundingMode RM);
+ opStatus convertFromSignExtendedInteger(const integerPart *Input,
+ unsigned int InputSize, bool IsSigned,
+ roundingMode RM);
+ opStatus convertFromZeroExtendedInteger(const integerPart *Input,
+ unsigned int InputSize, bool IsSigned,
+ roundingMode RM);
+ Expected<opStatus> convertFromString(StringRef, roundingMode);
+ APInt bitcastToAPInt() const;
+
+ opStatus convert(const fltSemantics &, roundingMode, bool *);
+
+ opStatus convertFrom(const IEEEFloat &ieee, roundingMode RM, bool *losesInfo);
+
+ opStatus convertTo(const fltSemantics &toSemantics, roundingMode RM,
+ APInt &encoding, bool *losesInfo) const;
+
+ unsigned int convertToHexString(char *DST, unsigned int HexDigits,
+ bool UpperCase, roundingMode RM) const;
+
+ cmpResult compare(const HexFloat &) const;
+ cmpResult compareAbsoluteValue(const HexFloat &) const;
+
+ bool bitwiseIsEqual(const HexFloat &other) const;
+
+ bool isZero() const;
+ bool isNegative() const;
+ bool isDenormal() const;
+ bool isSmallest() const;
+ bool isLargest() const;
+ bool isInteger() const;
+
+ bool isSmallestNormalized() const;
+
+ void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
+ unsigned FormatMaxPadding = 3, bool TruncateZero = true) const;
+
+ bool getExactInverse(APFloat *inv) const;
+
+ // If this is an exact power of two, return the exponent. If it's not an exact
+ // power of 2, return INT_MIN
+ LLVM_READONLY
+ int getExactLog2() const {
+ return isNegative() ? INT_MIN : getExactLog2Abs();
+ }
+
+ // If this is an exact power of two, return the exponent while ignoring the
+ // sign bit. If it's not an exact power of 2, return INT_MIN
+ LLVM_READONLY
+ int getExactLog2Abs() const;
+
+ int getRadix() const { return 16; }
+ ExponentType getExponent() const { return exponent; }
+ APInt getSignificand() const { return significand; }
+ bool decompose(APInt &, int &) const;
+
+ hash_code hash_value() const;
+
+ void dump() const;
+
+ friend int ilogb(const HexFloat &);
+ friend HexFloat scalbn(HexFloat X, int Exp, roundingMode);
+ friend HexFloat frexp(HexFloat X, int &Exp, roundingMode);
+
+private:
+ static unsigned int getNumPrecisionBits(const fltSemantics *semantics);
+ unsigned int getNumPrecisionBits() const {
+ return getNumPrecisionBits(semantics);
+ }
+ Expected<opStatus> convertFromDecimalString(StringRef, roundingMode);
+ Expected<opStatus> convertFromHexadecimalString(StringRef, roundingMode);
+ opStatus handleOverflow(bool isNeg = false);
+ opStatus handleUnderflow(bool isNeg = false);
+ opStatus handleDenorm(APInt &significand, int &exponent);
+};
+int ilogb(const HexFloat &Arg);
+HexFloat scalbn(HexFloat X, int Exp, roundingMode);
+HexFloat frexp(HexFloat X, int &Exp, roundingMode);
+
} // End detail namespace
// How the nonfinite values Inf and NaN are represented.
@@ -987,6 +1174,9 @@ enum class fltNanEncoding {
};
/* Represents floating point arithmetic semantics. */
struct fltSemantics {
+ /* Human readable name that identifies what these semantics represent */
+ const char *name;
+
/* The largest E such that 2^E is representable; this matches the
definition of IEEE 754. */
APFloatBase::ExponentType maxExponent;
@@ -1002,6 +1192,9 @@ struct fltSemantics {
/* Number of bits actually used in the semantics. */
unsigned int sizeInBits;
+ /* Radix this format uses: binary by default */
+ unsigned int radix = 2;
+
fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
@@ -1021,6 +1214,7 @@ struct fltSemantics {
class APFloat : public APFloatBase {
using IEEEFloat = detail::IEEEFloat;
using DoubleAPFloat = detail::DoubleAPFloat;
+ using HexFloat = detail::HexFloat;
static_assert(std::is_standard_layout<IEEEFloat>::value);
@@ -1028,12 +1222,20 @@ class APFloat : public APFloatBase {
const fltSemantics *semantics;
IEEEFloat IEEE;
DoubleAPFloat Double;
+ HexFloat Hex;
LLVM_ABI explicit Storage(IEEEFloat F, const fltSemantics &S);
explicit Storage(DoubleAPFloat F, const fltSemantics &S)
: Double(std::move(F)) {
assert(&S == &PPCDoubleDouble());
}
+ explicit Storage(HexFloat F, const fltSemantics &Semantics) {
+ if (usesLayout<HexFloat>(Semantics)) {
+ new (&Hex) HexFloat(std::move(F));
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+ }
template <typename... ArgTypes>
Storage(const fltSemantics &Semantics, ArgTypes &&... Args) {
@@ -1045,6 +1247,10 @@ class APFloat : public APFloatBase {
new (&Double) DoubleAPFloat(Semantics, std::forward<ArgTypes>(Args)...);
return;
}
+ if (usesLayout<HexFloat>(Semantics)) {
+ new (&Hex) HexFloat(Semantics, std::forward<ArgTypes>(Args)...);
+ return;
+ }
llvm_unreachable("Unexpected semantics");
}
@@ -1057,11 +1263,17 @@ class APFloat : public APFloatBase {
template <typename T> static bool usesLayout(const fltSemantics &Semantics) {
static_assert(std::is_same<T, IEEEFloat>::value ||
- std::is_same<T, DoubleAPFloat>::value);
+ std::is_same<T, DoubleAPFloat>::value ||
+ std::is_same<T, HexFloat>::value);
if (std::is_same<T, DoubleAPFloat>::value) {
return &Semantics == &PPCDoubleDouble();
}
- return &Semantics != &PPCDoubleDouble();
+ const bool is_hex = &Semantics == &HexFP32() || &Semantics == &HexFP64() ||
+ &Semantics == &HexFP128();
+ if (std::is_same_v<T, HexFloat>) {
+ return is_hex;
+ }
+ return !is_hex && &Semantics != &PPCDoubleDouble();
}
IEEEFloat &getIEEE() {
@@ -1080,6 +1292,18 @@ class APFloat : public APFloatBase {
llvm_unreachable("Unexpected semantics");
}
+ HexFloat &getHex() {
+ if (usesLayout<HexFloat>(*U.semantics))
+ return U.Hex;
+ llvm_unreachable("Unexpected semantics");
+ }
+
+ const HexFloat &getHex() const {
+ if (usesLayout<HexFloat>(*U.semantics))
+ return U.Hex;
+ llvm_unreachable("Unexpected semantics");
+ }
+
void makeZero(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeZero(Neg)); }
void makeInf(bool Neg) { APFLOAT_DISPATCH_ON_SEMANTICS(makeInf(Neg)); }
@@ -1103,6 +1327,7 @@ class APFloat : public APFloatBase {
explicit APFloat(IEEEFloat F, const fltSemantics &S) : U(std::move(F), S) {}
explicit APFloat(DoubleAPFloat F, const fltSemantics &S)
: U(std::move(F), S) {}
+ explicit APFloat(HexFloat F, const fltSemantics &S) : U(std::move(F), S) {}
// Compares the absolute value of this APFloat with another. Both operands
// must be finite non-zero.
@@ -1248,6 +1473,8 @@ class APFloat : public APFloatBase {
return U.IEEE.add(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.add(RHS.U.Double, RM);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.add(RHS.U.Hex, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus subtract(const APFloat &RHS, roundingMode RM) {
@@ -1257,6 +1484,8 @@ class APFloat : public APFloatBase {
return U.IEEE.subtract(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.subtract(RHS.U.Double, RM);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.subtract(RHS.U.Hex, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus multiply(const APFloat &RHS, roundingMode RM) {
@@ -1266,6 +1495,8 @@ class APFloat : public APFloatBase {
return U.IEEE.multiply(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.multiply(RHS.U.Double, RM);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.multiply(RHS.U.Hex, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus divide(const APFloat &RHS, roundingMode RM) {
@@ -1275,6 +1506,8 @@ class APFloat : public APFloatBase {
return U.IEEE.divide(RHS.U.IEEE, RM);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.divide(RHS.U.Double, RM);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.divide(RHS.U.Hex, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus remainder(const APFloat &RHS) {
@@ -1284,6 +1517,8 @@ class APFloat : public APFloatBase {
return U.IEEE.remainder(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.remainder(RHS.U.Double);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.remainder(RHS.U.Hex);
llvm_unreachable("Unexpected semantics");
}
opStatus mod(const APFloat &RHS) {
@@ -1293,6 +1528,8 @@ class APFloat : public APFloatBase {
return U.IEEE.mod(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.mod(RHS.U.Double);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.mod(RHS.U.Hex);
llvm_unreachable("Unexpected semantics");
}
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend,
@@ -1306,6 +1543,8 @@ class APFloat : public APFloatBase {
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.fusedMultiplyAdd(Multiplicand.U.Double, Addend.U.Double,
RM);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.fusedMultiplyAdd(Multiplicand.U.Hex, Addend.U.Hex, RM);
llvm_unreachable("Unexpected semantics");
}
opStatus roundToIntegral(roundingMode RM) {
@@ -1471,6 +1710,8 @@ class APFloat : public APFloatBase {
return U.IEEE.compare(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.compare(RHS.U.Double);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.compare(RHS.U.Hex);
llvm_unreachable("Unexpected semantics");
}
@@ -1481,6 +1722,8 @@ class APFloat : public APFloatBase {
return U.IEEE.bitwiseIsEqual(RHS.U.IEEE);
if (usesLayout<DoubleAPFloat>(getSemantics()))
return U.Double.bitwiseIsEqual(RHS.U.Double);
+ if (usesLayout<HexFloat>(getSemantics()))
+ return U.Hex.bitwiseIsEqual(RHS.U.Hex);
llvm_unreachable("Unexpected semantics");
}
@@ -1509,14 +1752,20 @@ class APFloat : public APFloatBase {
bool isInfinity() const { return getCategory() == fcInfinity; }
bool isNaN() const { return getCategory() == fcNaN; }
- bool isNegative() const { return getIEEE().isNegative(); }
+ bool isNegative() const {
+ return isHexFloat() ? getHex().isNegative() : getIEEE().isNegative();
+ }
bool isDenormal() const { APFLOAT_DISPATCH_ON_SEMANTICS(isDenormal()); }
- bool isSignaling() const { return getIEEE().isSignaling(); }
+ bool isSignaling() const {
+ return (!isHexFloat()) && getIEEE().isSignaling();
+ }
bool isNormal() const { return !isDenormal() && isFiniteNonZero(); }
bool isFinite() const { return !isNaN() && !isInfinity(); }
- fltCategory getCategory() const { return getIEEE().getCategory(); }
+ fltCategory getCategory() const {
+ return isHexFloat() ? getHex().getCategory() : getIEEE().getCategory();
+ }
const fltSemantics &getSemantics() const { return *U.semantics; }
bool isNonZero() const { return !isZero(); }
bool isFiniteNonZero() const { return isFinite() && !isZero(); }
@@ -1527,6 +1776,7 @@ class APFloat : public APFloatBase {
bool isSmallest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isSmallest()); }
bool isLargest() const { APFLOAT_DISPATCH_ON_SEMANTICS(isLargest()); }
bool isInteger() const { APFLOAT_DISPATCH_ON_SEMANTICS(isInteger()); }
+ bool isHexFloat() const { return usesLayout<HexFloat>(getSemantics()); }
bool isSmallestNormalized() const {
APFLOAT_DISPATCH_ON_SEMANTICS(isSmallestNormalized());
@@ -1574,11 +1824,9 @@ class APFloat : public APFloatBase {
friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM);
friend IEEEFloat;
friend DoubleAPFloat;
+ friend HexFloat;
};
-static_assert(sizeof(APFloat) == sizeof(detail::IEEEFloat),
- "Empty base class optimization is not performed.");
-
/// See friend declarations above.
///
/// These additional declarations are required in order to compile LLVM with IBM
@@ -1595,10 +1843,15 @@ LLVM_ABI hash_code hash_value(const APFloat &Arg);
/// Inf -> \c IEK_Inf
///
inline int ilogb(const APFloat &Arg) {
+ static_assert(sizeof(APFloat) == sizeof(APFloat::Storage),
+ "Empty base class optimization is not performed.");
+
if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
return ilogb(Arg.U.IEEE);
if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
return ilogb(Arg.U.Double);
+ if (Arg.isHexFloat())
+ return ilogb(Arg.getHex());
llvm_unreachable("Unexpected semantics");
}
@@ -1608,6 +1861,8 @@ inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) {
return APFloat(scalbn(X.U.IEEE, Exp, RM), X.getSemantics());
if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics()))
return APFloat(scalbn(X.U.Double, Exp, RM), X.getSemantics());
+ if (APFloat::usesLayout<detail::HexFloat>(X.getSemantics()))
+ return APFloat(scalbn(X.U.Hex, Exp, RM), X.getSemantics());
llvm_unreachable("Unexpected semantics");
}
@@ -1620,6 +1875,8 @@ inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) {
return APFloat(frexp(X.U.IEEE, Exp, RM), X.getSemantics());
if (APFloat::usesLayout<detail::DoubleAPFloat>(X.getSemantics()))
return APFloat(frexp(X.U.Double, Exp, RM), X.getSemantics());
+ if (APFloat::usesLayout<detail::HexFloat>(X.getSemantics()))
+ return APFloat(frexp(X.U.Hex, Exp, RM), X.getSemantics());
llvm_unreachable("Unexpected semantics");
}
/// Returns the absolute value of the argument.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 9193b5f8994e0..b1d9e43961447 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1923,6 +1923,10 @@ class [[nodiscard]] APInt {
return tcAddPart(dst, 1, parts);
}
+ /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+ /// to, or greater than RHS.
+ LLVM_ABI int compare(const APInt &RHS) const LLVM_READONLY;
+
/// Decrement a bignum in-place. Return the borrow flag.
static WordType tcDecrement(WordType *dst, unsigned parts) {
return tcSubtractPart(dst, 1, parts);
@@ -2106,10 +2110,6 @@ class [[nodiscard]] APInt {
/// out-of-line slow case for operator^=.
LLVM_ABI void xorAssignSlowCase(const APInt &RHS);
- /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
- /// to, or greater than RHS.
- LLVM_ABI int compare(const APInt &RHS) const LLVM_READONLY;
-
/// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
/// to, or greater than RHS.
LLVM_ABI int compareSigned(const APInt &RHS) const LLVM_READONLY;
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index b359c680ab673..caa576a26af04 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -34,6 +34,8 @@
return U.IEEE.METHOD_CALL; \
if (usesLayout<DoubleAPFloat>(getSemantics())) \
return U.Double.METHOD_CALL; \
+ if (usesLayout<HexFloat>(getSemantics())) \
+ return U.Hex.METHOD_CALL; \
llvm_unreachable("Unexpected semantics"); \
} while (false)
@@ -53,28 +55,63 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi
namespace llvm {
-constexpr fltSemantics APFloatBase::semIEEEhalf = {15, -14, 11, 16};
-constexpr fltSemantics APFloatBase::semBFloat = {127, -126, 8, 16};
-constexpr fltSemantics APFloatBase::semIEEEsingle = {127, -126, 24, 32};
-constexpr fltSemantics APFloatBase::semIEEEdouble = {1023, -1022, 53, 64};
-constexpr fltSemantics APFloatBase::semIEEEquad = {16383, -16382, 113, 128};
-constexpr fltSemantics APFloatBase::semFloat8E5M2 = {15, -14, 3, 8};
+constexpr fltSemantics APFloatBase::semIEEEhalf = {"IEEEhalf", 15, -14, 11, 16};
+constexpr fltSemantics APFloatBase::semBFloat = {"BFloat", 127, -126, 8, 16};
+constexpr fltSemantics APFloatBase::semIEEEsingle = {"IEEESingle", 127, -126,
+ 24, 32};
+constexpr fltSemantics APFloatBase::semIEEEdouble = {"IEEEdoublle", 1023, -1022,
+ 53, 64};
+constexpr fltSemantics APFloatBase::semIEEEquad = {"IEEEquad", 16383, -16382,
+ 113, 128};
+constexpr fltSemantics APFloatBase::semFloat8E5M2 = {"Float8E5M2", 15, -14, 3,
+ 8};
constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
- 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-constexpr fltSemantics APFloatBase::semFloat8E4M3 = {7, -6, 4, 8};
+ "Float8E5M2FNUZ",
+ 15,
+ -15,
+ 3,
+ 8,
+ 2,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::NegativeZero};
+constexpr fltSemantics APFloatBase::semFloat8E4M3 = {"Float8E4M3", 7, -6, 4, 8};
constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
- 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
+ "Float8E4M3FN",
+ 8,
+ -6,
+ 4,
+ 8,
+ 2,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::AllOnes};
constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
- 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+ "Float8E4M3FNUZ",
+ 7,
+ -7,
+ 4,
+ 8,
+ 2,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::NegativeZero};
constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
- 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
-constexpr fltSemantics APFloatBase::semFloat8E3M4 = {3, -2, 5, 8};
-constexpr fltSemantics APFloatBase::semFloatTF32 = {127, -126, 11, 19};
+ "Float8E4M3B11FNUZ",
+ 4,
+ -10,
+ 4,
+ 8,
+ 2,
+ fltNonfiniteBehavior::NanOnly,
+ fltNanEncoding::NegativeZero};
+constexpr fltSemantics APFloatBase::semFloat8E3M4 = {"Float8E3M4", 3, -2, 5, 8};
+constexpr fltSemantics APFloatBase::semFloatTF32 = {"FloatTF32", 127, -126, 11,
+ 19};
constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
+ "Float8E8M0FNU",
127,
-127,
1,
8,
+ 2,
fltNonfiniteBehavior::NanOnly,
fltNanEncoding::AllOnes,
false,
@@ -82,17 +119,41 @@ constexpr fltSemantics APFloatBase::semFloat8E8M0FNU = {
false};
constexpr fltSemantics APFloatBase::semFloat6E3M2FN = {
- 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
+ "Float6E3M2FN", 4, -2, 3, 6, 2, fltNonfiniteBehavior::FiniteOnly};
constexpr fltSemantics APFloatBase::semFloat6E2M3FN = {
- 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
+ "Float6E2M3FN", 2, 0, 4, 6, 2, fltNonfiniteBehavior::FiniteOnly};
constexpr fltSemantics APFloatBase::semFloat4E2M1FN = {
- 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
-constexpr fltSemantics APFloatBase::semX87DoubleExtended = {16383, -16382, 64,
- 80};
-constexpr fltSemantics APFloatBase::semBogus = {0, 0, 0, 0};
-constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {-1, 0, 0, 128};
+ "Float4E2M1FN", 2, 0, 2, 4, 2, fltNonfiniteBehavior::FiniteOnly};
+constexpr fltSemantics APFloatBase::semX87DoubleExtended = {
+ "X87DoubleExtended", 16383, -16382, 64, 80};
+constexpr fltSemantics APFloatBase::semBogus = {"Bogus", 0, 0, 0, 0};
+constexpr fltSemantics APFloatBase::semPPCDoubleDouble = {"PPCDoubleDouble", -1,
+ 0, 0, 128};
constexpr fltSemantics APFloatBase::semPPCDoubleDoubleLegacy = {
- 1023, -1022 + 53, 53 + 53, 128};
+ "PPCDoubleDoubleLegacy", 1023, -1022 + 53, 53 + 53, 128};
+
+/* Hex float
+ All three sizes share the same sign and exponent fields
+ 1 bit for sign
+ 7 bits for unsigned CHARACTERISTIC, which is the exponent + a bias of 64.
+ Therefore the range of exponent is -64 .. +63.
+ The exponent is for base 16.
+ (Therefore, the minimum exponent for base 2 is 16^-64 == 2^-256
+ The maximum exponent for base 2 is 16^63 == 2^252)
+ For Hex_FP32, the next 24 bits are the SIGNIFICAND.
+ For Hex_FP64, the next 56 bits are the SIGNIFICAND.
+ For Hex_FP128, two 64-bit values are used. The first is as Hex_FP64,
+ In the second, the lower 56 bits form the lower significant part
+ of the SIGNIFICAND.
+ Precision is given in terms of the radix, e.g., Hex_FP32 has
+ 6 hexits of precision.
+*/
+constexpr fltSemantics APFloatBase::semHex_FP32 = {
+ "Hex_FP32", 63, -64, 6, 32, 16, fltNonfiniteBehavior::FiniteOnly};
+constexpr fltSemantics APFloatBase::semHex_FP64 = {
+ "Hex_FP64", 63, -64, 14, 64, 16, fltNonfiniteBehavior::FiniteOnly};
+constexpr fltSemantics APFloatBase::semHex_FP128 = {
+ "Hex_FP128", 63, -64, 28, 128, 16, fltNonfiniteBehavior::FiniteOnly};
const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
switch (S) {
@@ -136,6 +197,12 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
return Float4E2M1FN();
case S_x87DoubleExtended:
return x87DoubleExtended();
+ case S_HexFP32:
+ return HexFP32();
+ case S_HexFP64:
+ return HexFP64();
+ case S_HexFP128:
+ return HexFP128();
}
llvm_unreachable("Unrecognised floating semantics");
}
@@ -182,6 +249,12 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
return S_Float4E2M1FN;
else if (&Sem == &llvm::APFloat::x87DoubleExtended())
return S_x87DoubleExtended;
+ else if (&Sem == &llvm::APFloat::HexFP32())
+ return S_HexFP32;
+ else if (&Sem == &llvm::APFloat::HexFP64())
+ return S_HexFP64;
+ else if (&Sem == &llvm::APFloat::HexFP128())
+ return S_HexFP128;
else
llvm_unreachable("Unknown floating semantics");
}
@@ -210,6 +283,9 @@ const unsigned int maxPowerOfFiveParts =
2 +
((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+const char *APFloatBase::semanticsName(const fltSemantics &semantics) {
+ return semantics.name;
+}
unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
return semantics.precision;
}
@@ -243,6 +319,10 @@ bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) {
return semantics.hasSignedRepr;
}
+unsigned int APFloatBase::semanticsRadix(const fltSemantics &semantics) {
+ return semantics.radix;
+}
+
bool APFloatBase::semanticsHasInf(const fltSemantics &semantics) {
return semantics.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754;
}
@@ -2914,7 +2994,7 @@ IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
unsigned sigPartCount, int exp,
roundingMode rounding_mode) {
unsigned int parts, pow5PartCount;
- fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
+ fltSemantics calcSemantics = {"calc", 32767, -32767, 0, 0};
integerPart pow5Parts[maxPowerOfFiveParts];
bool isNearest;
@@ -5865,248 +5945,2284 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
std::move(Second));
}
-} // namespace detail
-
-APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
- if (usesLayout<IEEEFloat>(Semantics)) {
- new (&IEEE) IEEEFloat(std::move(F));
- return;
- }
- if (usesLayout<DoubleAPFloat>(Semantics)) {
- const fltSemantics& S = F.getSemantics();
- new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
- APFloat(APFloatBase::IEEEdouble()));
- return;
- }
- llvm_unreachable("Unexpected semantics");
-}
+// class HexFloatArith implements HFP arithemtic using the conventions
+// and approaches of the arith library, and matches the behaviour
+// of the hardware.
+class HexFloatArith {
+public:
+ struct value_t {
+ int sign; // -1 for negative, +1 for positive
+ int exponent;
+ APInt fraction;
+ };
+ static void fetch(const HexFloat &in, value_t &out);
+ static void align(value_t &, value_t &, bool sticky = false);
+ static void add(value_t &, const value_t &);
+ static void sub(value_t &, const value_t &);
+ static void mult(value_t &, const value_t &);
+ static void divide(value_t &, const value_t &);
+ static void norm(value_t &);
+ static int putres(const value_t &, HexFloat &);
+};
-Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
- roundingMode RM) {
- APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
+unsigned int HexFloat::getNumPrecisionBits(const fltSemantics *semantics) {
+ assert(APFloat::usesLayout<HexFloat>(*semantics) && "not a HexFloat");
+ return 4 * semantics->precision;
}
-hash_code hash_value(const APFloat &Arg) {
- if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
- return hash_value(Arg.U.IEEE);
- if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
- return hash_value(Arg.U.Double);
- llvm_unreachable("Unexpected semantics");
+void HexFloat::initialize(const fltSemantics *ourSemantics) {
+ semantics = ourSemantics;
+ significand = APInt(getNumPrecisionBits(semantics), 0);
+ makeZero(/* IsNegative */ false);
}
-APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
- : APFloat(Semantics) {
- auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
- assert(StatusOrErr && "Invalid floating point representation");
- consumeError(StatusOrErr.takeError());
+void HexFloat::assign(const HexFloat &rhs) {
+ assert(semantics == rhs.semantics);
+ sign = rhs.sign;
+ exponent = rhs.exponent;
+ significand = rhs.significand;
+ low_sign = rhs.low_sign;
+ low_exponent = rhs.low_exponent;
}
-FPClassTest APFloat::classify() const {
- if (isZero())
- return isNegative() ? fcNegZero : fcPosZero;
- if (isNormal())
- return isNegative() ? fcNegNormal : fcPosNormal;
- if (isDenormal())
- return isNegative() ? fcNegSubnormal : fcPosSubnormal;
- if (isInfinity())
- return isNegative() ? fcNegInf : fcPosInf;
- assert(isNaN() && "Other class of FP constant");
- return isSignaling() ? fcSNan : fcQNan;
+HexFloat::HexFloat(const fltSemantics &ourSemantics)
+ : semantics(&ourSemantics) {
+ initialize(&ourSemantics);
}
-bool APFloat::getExactInverse(APFloat *Inv) const {
- // Only finite, non-zero numbers can have a useful, representable inverse.
- // This check filters out +/- zero, +/- infinity, and NaN.
- if (!isFiniteNonZero())
- return false;
-
- // Historically, this function rejects subnormal inputs. One reason why this
- // might be important is that subnormals may behave differently under FTZ/DAZ
- // runtime behavior.
- if (isDenormal())
- return false;
-
- // A number has an exact, representable inverse if and only if it is a power
- // of two.
- //
- // Mathematical Rationale:
- // 1. A binary floating-point number x is a dyadic rational, meaning it can
- // be written as x = M / 2^k for integers M (the significand) and k.
- // 2. The inverse is 1/x = 2^k / M.
- // 3. For 1/x to also be a dyadic rational (and thus exactly representable
- // in binary), its denominator M must also be a power of two.
- // Let's say M = 2^m.
- // 4. Substituting this back into the formula for x, we get
- // x = (2^m) / (2^k) = 2^(m-k).
- //
- // This proves that x must be a power of two.
-
- // getExactLog2Abs() returns the integer exponent if the number is a power of
- // two or INT_MIN if it is not.
- const int Exp = getExactLog2Abs();
- if (Exp == INT_MIN)
- return false;
-
- // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
- // scaling 1.0 by the negated exponent.
- APFloat Reciprocal =
- scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
- rmTowardZero);
+HexFloat::HexFloat(const fltSemantics &ourSemantics, integerPart intValue)
+ : semantics(&ourSemantics) {
+ initialize(&ourSemantics);
+ static_assert(sizeof(intValue) <= sizeof(uint64_t));
+ if (!intValue) {
+ // initialize will have set us to zero
+ return;
+ }
- // scalbn might round if the resulting exponent -Exp is outside the
- // representable range, causing overflow (to infinity) or underflow. We
- // must verify that the result is still the exact power of two we expect.
- if (Reciprocal.getExactLog2Abs() != -Exp)
- return false;
+ // intValue is regarded as an integer.
+ // Therefore its radix point is to the right of intValue.
+ // HexFloat has the radix point to the left of the significand,
+ // therefore we initialize the exponent to the number of hexits
+ // intValue could contain. We then normalize, decremeting exponent
+ // each time we shift.
- // Avoid multiplication with a subnormal, it is not safe on all platforms and
- // may be slower than a normal division.
- if (Reciprocal.isDenormal())
- return false;
+ APInt working_significand(sizeof(intValue) * 8, intValue);
+ APInt mask(working_significand.getBitWidth(), 0);
+ mask.setBit(mask.getBitWidth() - 4); // lowest bit of top hexit
+ // the corresponding constructor for IEEE seems to assume that the
+ // value cannot be negative.
+ sign = 0;
+ exponent = (sizeof(intValue) * 8) / 4;
+ // normalize
+ while (mask.ugt(working_significand)) {
+ working_significand <<= 4;
+ exponent--;
+ }
+ int delta_width =
+ working_significand.getBitWidth() - significand.getBitWidth();
+ if (delta_width > 0) {
+ // HexFloat always rounds towards 0, so truncate is adequate
+ // APInt:trunc truncates on left
+ working_significand = working_significand.lshr(delta_width);
+ working_significand = working_significand.trunc(significand.getBitWidth());
+ } else if (delta_width < 0) {
+ // APInt::zext extends on left
+ working_significand = working_significand.zext(significand.getBitWidth());
+ working_significand <<= -delta_width;
+ }
+ significand = working_significand;
+}
- assert(Reciprocal.isFiniteNonZero());
+HexFloat::HexFloat(const fltSemantics &ourSemantics, uninitializedTag)
+ : semantics(&ourSemantics) {
+ initialize(&ourSemantics);
+}
- if (Inv)
- *Inv = std::move(Reciprocal);
+HexFloat::HexFloat(const fltSemantics &ourSemantics,
+ const APInt &EncodedHexFloat)
+ : semantics(&ourSemantics) {
+ initialize(&ourSemantics);
+ assert(semantics == &ourSemantics && "semantics not initialized as expected");
- return true;
-}
+ assert(EncodedHexFloat.getBitWidth() == semantics->sizeInBits &&
+ "EncodedHexFloat has unexpected size");
-APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
- roundingMode RM, bool *losesInfo) {
- if (&getSemantics() == &ToSemantics) {
- *losesInfo = false;
- return opOK;
- }
- if (usesLayout<IEEEFloat>(getSemantics()) &&
- usesLayout<IEEEFloat>(ToSemantics))
- return U.IEEE.convert(ToSemantics, RM, losesInfo);
- if (usesLayout<IEEEFloat>(getSemantics()) &&
- usesLayout<DoubleAPFloat>(ToSemantics)) {
- assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
- auto Ret =
- U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
- *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
- return Ret;
- }
- if (usesLayout<DoubleAPFloat>(getSemantics()) &&
- usesLayout<IEEEFloat>(ToSemantics)) {
- auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
- *this = APFloat(std::move(getIEEE()), ToSemantics);
- return Ret;
+ auto get_sign_exponent = [](const APInt &i, int &s, int &e) {
+ auto left_byte = i.getHiBits(8).trunc(8).getLimitedValue();
+ s = (left_byte & 0x80) ? 1 : 0;
+ e = (left_byte & 0x7f) - 64;
+ };
+ int s, e;
+ get_sign_exponent(EncodedHexFloat, s, e);
+ sign = s;
+ exponent = e;
+ if (semantics == &APFloatBase::HexFP128()) {
+ // we need to remove the sign/exponent byte from the lower order 64 bit
+ // value, and save them.
+ // We then need to form the significand from the low 56 bits of each part.
+ APInt low(EncodedHexFloat.trunc(64));
+ APInt high(EncodedHexFloat.lshr(64).trunc(64));
+ APInt low_significand(low.trunc(56).zext(112));
+ APInt high_significand(high.trunc(56).zext(112));
+ high_significand <<= 56;
+ significand = high_significand | low_significand;
+ get_sign_exponent(low, s, e);
+ low_sign = s;
+ low_exponent = e;
+ } else {
+ auto NumPrecisionBits = getNumPrecisionBits(semantics);
+ significand =
+ EncodedHexFloat.getLoBits(NumPrecisionBits).trunc(NumPrecisionBits);
}
- llvm_unreachable("Unexpected semantics");
+ assert(significand.getBitWidth() == getNumPrecisionBits(semantics));
}
-APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
- return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
+HexFloat::HexFloat(double d) {
+ semantics = &APFloatBase::HexFP64();
+ initialize(semantics);
+ llvm_unreachable("HexFloat constructor double: cannot create from double\n");
}
-void APFloat::print(raw_ostream &OS) const {
- SmallVector<char, 16> Buffer;
- toString(Buffer);
- OS << Buffer;
+HexFloat::HexFloat(float f) {
+ semantics = &APFloatBase::HexFP32();
+ initialize(semantics);
+ llvm_unreachable("HexFloat constructor float: cannot create from float\n");
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void APFloat::dump() const {
- print(dbgs());
- dbgs() << '\n';
+HexFloat::HexFloat(const HexFloat &rhs) {
+ initialize(rhs.semantics);
+ assign(rhs);
}
-#endif
-void APFloat::Profile(FoldingSetNodeID &NID) const {
- NID.Add(bitcastToAPInt());
-}
+HexFloat::HexFloat(HexFloat &&rhs) { *this = std::move(rhs); }
-APFloat::opStatus APFloat::convertToInteger(APSInt &result,
- roundingMode rounding_mode,
- bool *isExact) const {
- unsigned bitWidth = result.getBitWidth();
- SmallVector<uint64_t, 4> parts(result.getNumWords());
- opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
- rounding_mode, isExact);
- // Keeps the original signed-ness.
- result = APInt(bitWidth, parts);
- return status;
-}
+HexFloat::~HexFloat() {}
-double APFloat::convertToDouble() const {
- if (&getSemantics() ==
- (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
- return getIEEE().convertToDouble();
- assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
- "Float semantics is not representable by IEEEdouble");
- APFloat Temp = *this;
- bool LosesInfo;
- opStatus St =
- Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
- assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
- (void)St;
- return Temp.getIEEE().convertToDouble();
+HexFloat &HexFloat::operator=(const HexFloat &rhs) {
+ if (this != &rhs) {
+ if (semantics != rhs.semantics) {
+ initialize(rhs.semantics);
+ }
+ assign(rhs);
+ }
+ return *this;
}
-#ifdef HAS_IEE754_FLOAT128
-float128 APFloat::convertToQuad() const {
- if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
- return getIEEE().convertToQuad();
- assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
- "Float semantics is not representable by IEEEquad");
- APFloat Temp = *this;
- bool LosesInfo;
- opStatus St =
- Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
- assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
- (void)St;
- return Temp.getIEEE().convertToQuad();
+HexFloat &HexFloat::operator=(HexFloat &&rhs) {
+ if (this != &rhs) {
+ if (semantics != rhs.semantics) {
+ initialize(rhs.semantics);
+ }
+ assign(rhs);
+ }
+ return *this;
}
-#endif
-float APFloat::convertToFloat() const {
- if (&getSemantics() ==
- (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
- return getIEEE().convertToFloat();
- assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
- "Float semantics is not representable by IEEEsingle");
- APFloat Temp = *this;
- bool LosesInfo;
- opStatus St =
- Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
- assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
- (void)St;
- return Temp.getIEEE().convertToFloat();
+void HexFloatArith::fetch(const HexFloat &hf, value_t &v) {
+ v.sign = hf.sign ? -1 : 1;
+ v.exponent = hf.exponent;
+ // 4 bits each for the guard hexit on the right,
+ // and possible carry on the left.
+ v.fraction = hf.significand.zext(hf.significand.getBitWidth() + 8);
+ // zext adds on left.
+ v.fraction <<= 4;
+ if (v.fraction.isZero())
+ v.sign = 0;
}
-bool APFloatBase::isValidArbitraryFPFormat(StringRef Format) {
- static constexpr StringLiteral ValidFormats[] = {
- "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
- "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
- "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
- return llvm::is_contained(ValidFormats, Format);
-}
+void HexFloatArith::align(value_t &left, value_t &right,
+ bool sticky /* = false */) {
+ // align by increasing the lower exponent to the match the higher
+ // sticky indicates that the least significant bit should be set if
+ // if any 1s are shifted out on the right.
+ assert((left.fraction.getBitWidth() == right.fraction.getBitWidth()) &&
+ "left and right significands have different sizes");
-APFloat::Storage::~Storage() {
- if (usesLayout<IEEEFloat>(*semantics)) {
- IEEE.~IEEEFloat();
+ int nhexits = left.fraction.getBitWidth() / 4; // 4 bits per hexit
+ int delta_exp = left.exponent - right.exponent;
+ value_t *value_to_align, *other;
+
+ if (delta_exp == 0)
return;
+
+ if (delta_exp > 0) {
+ // the value to align is the right
+ value_to_align = &right;
+ other = &left;
+ } else {
+ value_to_align = &left;
+ other = &right;
+ delta_exp = -delta_exp;
}
- if (usesLayout<DoubleAPFloat>(*semantics)) {
- Double.~DoubleAPFloat();
+ value_to_align->exponent = other->exponent;
+
+ if (delta_exp >= (nhexits - 1)) {
+ value_to_align->fraction.clearAllBits();
return;
}
- llvm_unreachable("Unexpected semantics");
+
+ int placesToShift = delta_exp * 4;
+
+ if (sticky) {
+ int numTrailingZeros = value_to_align->fraction.countTrailingZeros();
+ sticky = (numTrailingZeros < placesToShift);
+ }
+
+ value_to_align->fraction = value_to_align->fraction.lshr(placesToShift);
+
+ if (sticky)
+ value_to_align->fraction.setBit(0);
}
-APFloat::Storage::Storage(const APFloat::Storage &RHS) {
- if (usesLayout<IEEEFloat>(*RHS.semantics)) {
- new (this) IEEEFloat(RHS.IEEE);
+void HexFloatArith::norm(value_t &v) {
+ if (v.fraction.isZero()) {
+ v.sign = 0;
+ v.exponent = 0;
return;
}
- if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
- new (this) DoubleAPFloat(RHS.Double);
+ APInt t(v.fraction.getBitWidth(), 0); // t will be used to hold
+ // a value that v will
+ // be compared with
+
+ // check for carry
+ t.setBit(t.getBitWidth() - 4); // the low bit of the top hexit,
+ // i.e., where the carry is
+ if (v.fraction.uge(t)) {
+ v.fraction = v.fraction.lshr(4);
+ v.exponent++;
return;
}
- llvm_unreachable("Unexpected semantics");
+
+ // general case --- normalize
+ t.clearAllBits();
+ t.setBit(t.getBitWidth() - 8); // lowest bit of the top hexit of the value
+ while (v.fraction.ult(t)) {
+ v.fraction <<= 4;
+ v.exponent--;
+ }
+}
+
+int HexFloatArith::putres(const value_t &v, HexFloat &result) {
+ // This is the inverse of "fetch", and is responsible for converting
+ // the (internal) arith representation to a HexFloat.
+
+ int ret_val = v.sign;
+ if (v.exponent < -64) {
+ // underflow --- set to 0.0
+ result.makeZero(v.sign < 0);
+ return 0;
+ } else if (v.exponent > 1024) {
+ /* This indicates division by zero (see HexFloatArith::divide). */
+ /* In this case the arith putres takes the first input operand */
+ /* as the result. The first operand is result (see the control */
+ /* flow in HexFloat::divide). Thus there is nothing to do */
+ /* here, as input object to the HexFloat routine (i.e., the */
+ /* HexFloat::divide's *this object) hasn't been modified. */
+ return ret_val;
+ }
+
+ // shift right to eliminate the guard hexit on the right.
+ APInt fraction = v.fraction.lshr(4);
+ fraction = fraction.trunc(fraction.getBitWidth() - 8);
+ assert((fraction.getBitWidth() == result.significand.getBitWidth()) &&
+ "fraction has unexpected width");
+ result.significand = fraction;
+ if (result.significand.isZero()) {
+ result.makeZero(v.sign < 0);
+ ret_val = 0;
+ } else {
+ assert(v.sign != 0 && "v.sign is unexpectedly zero");
+ result.sign = v.sign < 0 ? 1 : 0;
+ if (v.exponent > 63) {
+ // silently wrapround the exponent
+ result.exponent = v.exponent % 64;
+ } else {
+ // underflow case, i.e., exppnent < -64 was dealt with above
+ result.exponent = v.exponent;
+ }
+ if (result.semantics == &APFloatBase::HexFP128()) {
+ result.low_exponent = (result.exponent - 14) & 0x7f;
+ result.low_sign = result.sign;
+ }
+ }
+ return ret_val;
+}
+
+void HexFloatArith::add(value_t &left, const value_t &right) {
+ assert(left.fraction.countLeadingZeros() >= 4 &&
+ "expect top hexit of left to be zero");
+ assert(right.fraction.countLeadingZeros() >= 4 &&
+ "expect top hexit of right to be zero");
+ assert((((1 == left.sign) && (-1 != right.sign)) ||
+ ((-1 == left.sign) && (1 != right.sign)) || (0 == left.sign)) &&
+ "add expects values to have same sign or be zero");
+ assert(left.exponent == right.exponent &&
+ "add expects values to have the same exponent");
+ left.sign |= right.sign; // can't have +1 and -1, and | deals with 0
+ left.fraction += right.fraction;
+}
+
+void HexFloatArith::sub(value_t &left, const value_t &right) {
+ assert(left.exponent == right.exponent &&
+ "sub expects values to have the same exponent");
+ assert(left.fraction.countl_zero() >= 4 &&
+ "top (carry/guard) hexit of left unexpectedly not zero");
+ assert(right.fraction.countl_zero() >= 4 &&
+ "top (carry/guard) hexit of right unexpectedly not zero");
+ left.fraction -= right.fraction;
+ APInt t(left.fraction.getBitWidth(), 0);
+ t.setHighBits(1);
+ if (left.fraction.uge(t)) {
+ left.fraction.negate();
+ // The following assignment is safe, in that we know that right.sign cannot
+ // be zero. We know this for the following reason.
+ // The top hexit of the fraction is for carry/borrow.
+ // On entry this hexit must have been zero (we asserted that above).
+ // The only way the hexit could become set is if
+ // right.fraction > left.fraction.
+ // That means that right.fraction must be > 0,
+ // which means that right.sign cannot be 0.
+ assert(right.sign != 0 && "right.sign unexpectedly zero");
+ left.sign = -right.sign;
+ }
+}
+
+void HexFloatArith::mult(value_t &left, const value_t &right) {
+ // This needs a little care. The intermediate product needs to
+ // be twice as large as the arguments. However, recall that
+ // in the representation here there is a guard hexit on the right,
+ // and a hexit on the left for a possible carry.
+ // APInt requires that operands and results have the same size.
+ // We therefore need to extend the size of the operands before
+ // working on them.
+ // Result is placed in left.
+ int extend_to = 2 * (left.fraction.getBitWidth() - 4) + 4;
+ int extend_by = extend_to - left.fraction.getBitWidth();
+ APInt L = left.fraction.zext(extend_to);
+ APInt R = right.fraction.zext(extend_to);
+ APInt prod(L * R);
+
+ // The assignment below is safe.
+ // If either left.sign or right.sign is zero, the result sign is zero.
+ // That's ok. because the fraction will also be zero.
+ assert((left.sign || left.fraction.isZero()) &&
+ "left.sign is zero, but fraction is not");
+ assert((right.sign || right.fraction.isZero()) &&
+ "right.sign is zero, but fraction is not");
+ left.sign = left.sign * right.sign;
+ left.exponent = left.exponent + right.exponent;
+ left.fraction = prod.lshr(extend_by).trunc(left.fraction.getBitWidth());
+}
+
+/// divide numerator by divisor, leaving the result in numerator.
+/// numerator and divisor are assumed to be normalized.
+void HexFloatArith::divide(value_t &numerator, const value_t &divisor_in) {
+ value_t divisor = divisor_in;
+ APInt t(divisor.fraction.getBitWidth(), 0);
+
+ // check if dividing by 0
+ if (0 == divisor.sign) {
+ /* divisor is zero */
+ numerator.exponent += 2048;
+ // want to set all hexits in the fraction to f, with the
+ // exception of the left most, which should be 0.
+ // APInt has setHighBits, but no corresponding clearHighBits
+ numerator.fraction.clearAllBits();
+ numerator.fraction.setLowBits(numerator.fraction.getBitWidth() - 4);
+ return;
+ }
+
+ // check if numerator is 0. If it is, then the result is 0.
+ if (0 == numerator.sign)
+ return;
+
+ // bin-normalize the divisor
+ // The loop is safe, i.e., will terminate, because at this point
+ // we know the diviosr is not zero.
+ // We assert for sanity.
+ // We also know that because the divisor is normalized the loop
+ // will not iterate more than 3 times.
+ // Numerator left-most bits are not lost, and simply shift into the carry
+ // hexit. We deal with that after the loop by renormalizing.
+ assert(!divisor.fraction.isZero() && "divisor unexpectedly zero");
+ t.clearAllBits();
+ t.setBit(t.getBitWidth() - 4 - 1);
+ while (divisor.fraction.ult(t)) {
+ numerator.fraction <<= 1;
+ divisor.fraction <<= 1;
+ }
+
+ // Check if need to renormalize the numerator
+ t.clearAllBits();
+ t.setBit(t.getBitWidth() - 4); // low bit of highest hexit, i.e., the carry
+ if (numerator.fraction.uge(t)) {
+ numerator.fraction = numerator.fraction.lshr(4);
+ numerator.exponent++;
+ }
+
+ int extend_to = 2 * (numerator.fraction.getBitWidth() - 4) + 4;
+ int extend_by = extend_to - numerator.fraction.getBitWidth();
+ APInt top = numerator.fraction.zext(extend_to);
+ APInt bottom = divisor.fraction.zext(extend_to);
+ top <<= extend_by;
+ APInt q = top.udiv(bottom);
+
+ numerator.sign = numerator.sign * divisor.sign;
+ numerator.exponent = numerator.exponent - divisor.exponent;
+ numerator.fraction = q.trunc(numerator.fraction.getBitWidth());
+}
+
+opStatus HexFloat::add(const HexFloat &RHS, roundingMode RM) {
+ HexFloatArith::value_t left, right;
+
+ HexFloatArith::fetch(*this, left);
+ HexFloatArith::fetch(RHS, right);
+ HexFloatArith::align(left, right);
+ if ((left.sign != 0) && (right.sign != 0) && (left.sign != right.sign)) {
+ right.sign = -right.sign;
+ HexFloatArith::sub(left, right);
+ } else {
+ HexFloatArith::add(left, right);
+ }
+ HexFloatArith::norm(left);
+ HexFloatArith::putres(left, *this);
+
+ return opOK;
+}
+
+opStatus HexFloat::subtract(const HexFloat &RHS, roundingMode RM) {
+ HexFloatArith::value_t left, right;
+
+ HexFloatArith::fetch(*this, left);
+ HexFloatArith::fetch(RHS, right);
+ HexFloatArith::align(left, right);
+ if ((left.sign != 0) && (right.sign != 0) && (left.sign != right.sign)) {
+ right.sign = left.sign;
+ HexFloatArith::add(left, right);
+ } else {
+ HexFloatArith::sub(left, right);
+ }
+ HexFloatArith::norm(left);
+ HexFloatArith::putres(left, *this);
+
+ return opOK;
+}
+
+opStatus HexFloat::multiply(const HexFloat &RHS, roundingMode RM) {
+ HexFloatArith::value_t left, right;
+
+ HexFloatArith::fetch(*this, left);
+ HexFloatArith::fetch(RHS, right);
+ HexFloatArith::norm(left);
+ HexFloatArith::norm(right);
+ HexFloatArith::mult(left, right);
+ HexFloatArith::norm(left);
+ HexFloatArith::putres(left, *this);
+
+ return opOK;
+}
+
+opStatus HexFloat::divide(const HexFloat &RHS, roundingMode RM) {
+ HexFloatArith::value_t numerator, divisor;
+
+ HexFloatArith::fetch(*this, numerator);
+ HexFloatArith::fetch(RHS, divisor);
+ HexFloatArith::norm(numerator);
+ HexFloatArith::norm(divisor);
+ HexFloatArith::divide(numerator, divisor);
+ HexFloatArith::norm(numerator);
+ HexFloatArith::putres(numerator, *this);
+
+ return opOK;
+}
+
+opStatus HexFloat::remainder(const HexFloat &RHS) {
+ llvm_unreachable("TODO: HexFloat::remainder not yet implemented");
+}
+
+opStatus HexFloat::mod(const HexFloat &RHS) {
+ llvm_unreachable("TODO: HexFloat::mod not yet implemented");
+}
+
+opStatus HexFloat::fusedMultiplyAdd(const HexFloat &Multiplicand,
+ const HexFloat &Addend, roundingMode RM) {
+ auto widen = [](HexFloatArith::value_t &v) {
+ int widthIn = v.fraction.getBitWidth();
+ int widthOut = 2 * widthIn;
+ v.fraction = v.fraction.zext(widthOut);
+ v.fraction <<= (widthOut - widthIn);
+ };
+
+ auto narrow = [](HexFloatArith::value_t &v) {
+ int widthIn = v.fraction.getBitWidth();
+ int widthOut = widthIn / 2;
+ v.fraction = v.fraction.getHiBits(widthOut).trunc(widthOut);
+ };
+
+ HexFloatArith::value_t m1, m2, a;
+
+ // fetch operands
+ HexFloatArith::fetch(*this, m1);
+ HexFloatArith::fetch(Multiplicand, m2);
+ HexFloatArith::fetch(Addend, a);
+
+ // extend operands
+ widen(m1);
+ widen(m2);
+ widen(a);
+
+ // normalize
+ HexFloatArith::norm(m1);
+ HexFloatArith::norm(m2);
+ HexFloatArith::norm(a);
+
+ // Multiply and normalize. (Need normalize to prevent precision loss
+ // during forthcoming add.)
+ HexFloatArith::mult(m1, m2);
+ HexFloatArith::norm(m1);
+
+ // align in preparation for the add
+ HexFloatArith::align(m1, a, /*sticky=*/true);
+
+ // now do the addition
+ if ((m1.sign != 0) && (a.sign != 0) && (m1.sign != a.sign)) {
+ a.sign = -a.sign;
+ HexFloatArith::sub(m1, a);
+ } else {
+ HexFloatArith::add(m1, a);
+ }
+ // normalize
+ HexFloatArith::norm(m1);
+
+ // narrow the result
+ narrow(m1);
+
+ // unpack
+ HexFloatArith::putres(m1, *this);
+
+ return opOK;
+}
+
+APFloat::fltCategory HexFloat::getCategory() const {
+ // HexFloat does not have NaN or Infinity
+ if (isZero())
+ return fcZero;
+ return fcNormal;
+}
+
+void HexFloat::makeZero(bool Neg) {
+ // we set the exponent to minExponent so that when the bias is added,
+ // the biased field is 0; i.e., the encoded representation is all zeros
+ // with the exception of the sign bit which may be one to represent -0.0.
+ sign = Neg ? 1 : 0;
+ exponent = -64;
+ low_sign = 0;
+ low_exponent = -64;
+ significand.clearAllBits();
+}
+
+void HexFloat::makeInf(bool Neg) {
+ // HexFloat has no special indicator for infinity
+ makeLargest(Neg);
+}
+
+void HexFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
+ // HexFloat doesn't have the concept of NaN, but we need to so something
+ // if for no other reason than preventing the unittests from failing.
+ // FIXME: ideally the APFloat API should have predicates that indicate
+ // whether a floating point class supports NaN, and other concepts.
+ makeZero(false);
+}
+
+void HexFloat::makeLargest(bool Neg) {
+ sign = Neg ? 1 : 0;
+ exponent = semantics->maxExponent;
+ significand.setAllBits();
+}
+
+void HexFloat::makeSmallest(bool Neg) {
+ // this is a denormal
+ sign = Neg ? 1 : 0;
+ exponent = semantics->minExponent;
+ significand.clearAllBits();
+ significand.setBit(0);
+}
+
+void HexFloat::makeSmallestNormalized(bool Neg) {
+ sign = Neg ? 1 : 0;
+ exponent = semantics->minExponent;
+ significand.clearAllBits();
+ significand.setBit(getNumPrecisionBits() - 4);
+}
+
+bool HexFloat::needsCleanup() const {
+ // All our members but significand are trivally destructable,
+ // needsCleanup depends on significand
+ return significand.needsCleanup();
+}
+
+opStatus HexFloat::roundToIntegral(roundingMode RM) {
+ if (significand.isZero()) {
+ return opOK;
+ }
+
+ // Check to see if the value is already an integer.
+ // If the exponent is at least as large as the number of
+ // hexits in the significand then the value is definitely
+ // an integer.
+ // If the exponent is less than the maximum hexit width,
+ // the value could be an integer if it has trailing zeros.
+ int hexitWidth = significand.getBitWidth() / 4; // max possible hexits
+ int hexitTrailingZeros = significand.countr_zero() / 4;
+
+ // we know that the significand is not zero, whch means that
+ // the number of trailing zero hexits must be strictly less
+ // than the maximum number of hexits.
+ assert(hexitTrailingZeros < hexitWidth);
+
+ if (exponent >= (hexitWidth - hexitTrailingZeros))
+ return opOK;
+
+ // Now the non-trivial cases.
+
+ if (exponent < 1) {
+ // then the absolute value must be strictly less than one.
+ // THerefore the rounded value will be -1, 0, or 1,
+ // depending on the mode and the value.
+ // We also know that the significand is not zero
+ auto makeOne = [&]() {
+ // sign stays what it was
+ // 1 has an exponent of 1, and the top hexit is 1,
+ // which in binary is 0001.
+ exponent = 1;
+ significand.clearAllBits();
+ significand.setBit(significand.getBitWidth() - 4);
+ };
+
+ // for an exponent < 1, half must be represented by
+ // exponent = 0, and only the top bit of the significand set.
+ // We will need this constant for resolving ties for some modes.
+ APInt halfSignificand(significand);
+ halfSignificand.clearAllBits();
+ halfSignificand.setBit(halfSignificand.getBitWidth() - 1);
+
+ switch (RM) {
+ case rmTowardZero:
+ makeZero(sign);
+ break;
+ case rmTowardPositive:
+ if (sign) {
+ // negative, so rounding up comes to zero
+ makeZero(sign);
+ } else {
+ // make one
+ makeOne();
+ }
+ break;
+ case rmTowardNegative:
+ if (sign) {
+ // negative, so round to -1
+ makeOne();
+ } else {
+ makeZero(sign);
+ }
+ break;
+ case rmNearestTiesToEven:
+ if ((exponent < 0) || significand.ule(halfSignificand)) {
+ // if exponent < 0, then the absolute value must be less than a half,
+ // so we round to zero.`.
+ // if the exponent is not less than zero, then it must be 0 because
+ // we know that it is less than 1.
+ // If the exponent is 0, then we want to round to 0 if the value is a
+ // half.
+ makeZero(sign);
+ } else {
+ assert(exponent == 0);
+ makeOne();
+ }
+ break;
+ case rmNearestTiesToAway:
+ if ((exponent < 0) || significand.ult(halfSignificand)) {
+ makeZero(sign);
+ } else {
+ assert(exponent == 0);
+ makeOne();
+ }
+ break;
+ default:
+ llvm_unreachable("unknwon rounding mode");
+ }
+ return opInexact;
+ }
+
+ // From here on the exponent must >= 1
+ assert(exponent >= 1);
+
+ // Because of the tests above, the exponent must be strictly
+ // less than the number of hexits in the significand
+ assert(exponent < hexitWidth);
+
+ // We also know this:
+ assert(exponent < (hexitWidth - hexitTrailingZeros));
+
+ // find the position of the radix point
+ // An exponent >= 1 means that there is at least one hexit
+ // to the left of the radix point.
+ // From the tests above, we also know that there must be
+ // at least one hexit to the right of the radix point.
+ int hexitsToRightOfPoint = hexitWidth - exponent;
+ assert(hexitsToRightOfPoint >= 1);
+ assert(hexitsToRightOfPoint < hexitWidth);
+
+ APInt fractionMask(significand);
+ fractionMask.clearAllBits();
+ fractionMask.setLowBits(4 * hexitsToRightOfPoint);
+
+ APInt integerMask(fractionMask);
+ integerMask.flipAllBits();
+
+ APInt fraction(significand);
+ fraction &= fractionMask;
+
+ APInt topBitOfFractionMask(fractionMask);
+ topBitOfFractionMask.clearLowBits(4 * hexitsToRightOfPoint - 1);
+
+ APInt bottomBitOfIntegerMask(topBitOfFractionMask);
+ bottomBitOfIntegerMask <<= 1;
+
+ APInt integer(significand);
+ integer &= integerMask;
+
+ if (!fraction.isZero()) {
+ // only have anyting to do if the fraction part is not zero
+ auto increment = [RM](HexFloat *lhs, const APInt &rhs) {
+ HexFloat other(*lhs);
+ other.significand = rhs;
+ other.sign = 0;
+ lhs->add(other, RM);
+ };
+ auto decrement = [RM](HexFloat *lhs, const APInt &rhs) {
+ HexFloat other(*lhs);
+ other.significand = rhs;
+ other.sign = 0;
+ lhs->subtract(other, RM);
+ };
+ switch (RM) {
+ case rmTowardZero:
+ // truncate away everything to right of radix point
+ significand = integer;
+ break;
+ case rmTowardPositive:
+ // negative: -11.3 ==> -11, and -13.7 ==> -13.
+ // i.e., truncate away the fraction
+ // positive: 11.3 ==> 12, and 13.7 ==> 14
+ // i.e., truncate away the fraction, and increment integer;
+ // In either case, truncate away the fraction
+ significand = integer;
+ if (!sign) {
+ // i.e., positive
+ increment(this, bottomBitOfIntegerMask);
+ }
+ break;
+ case rmTowardNegative:
+ // negative: -11.3 ==> -12, and -13.7 ==> -14.
+ // i.e., truncate away the fraction, and decrement integer
+ // positive: 11.3 ==> 11, and 13.7 ==> 13
+ // i.e., truncate away the fraction
+ // In either case, truncate away the fraction.
+ significand = integer;
+ if (sign) {
+ // i.e., negative
+ decrement(this, bottomBitOfIntegerMask);
+ }
+ break;
+ case rmNearestTiesToEven:
+ if (sign) {
+ // -11.5 ==> -12 and -12.5 ==> -12
+ significand = integer;
+ if (fraction.ult(topBitOfFractionMask)) {
+ // e.g., -11.3, which goes to -11
+ } else if (fraction.ugt(topBitOfFractionMask)) {
+ // e.g., -12.7, which should go to -13
+ decrement(this, bottomBitOfIntegerMask);
+ } else if (integer.intersects(bottomBitOfIntegerMask)) {
+ // then we had a tie, and the interm result is odd.
+ // Need to subtract one.
+ decrement(this, bottomBitOfIntegerMask);
+ }
+ } else {
+ // 11.5 ==> 12 abd 12.5 ==> 12
+ significand = integer;
+ if (fraction.ult(topBitOfFractionMask)) {
+ // e.g., 12.3 ==> 12
+ } else if (fraction.ugt(topBitOfFractionMask)) {
+ // e.g. 12.7 ==> 13
+ increment(this, bottomBitOfIntegerMask);
+ } else if (integer.intersects(bottomBitOfIntegerMask)) {
+ // have tie and interm integer is odd
+ increment(this, bottomBitOfIntegerMask);
+ }
+ }
+ break;
+ case rmNearestTiesToAway:
+ if (sign) {
+ significand = integer;
+ if (fraction.ult(topBitOfFractionMask)) {
+ // e.g., -11.4 ==> -11
+ } else {
+ // e.g., -11.5 ==> -12 and -11.6 ==> -12
+ decrement(this, bottomBitOfIntegerMask);
+ }
+ } else {
+ significand = integer;
+ if (fraction.ult(topBitOfFractionMask)) {
+ // e.g., 11.4 ==> 11
+ } else {
+ // e.g., 11.5 ==> 12 and 11.6 ==> 12
+ increment(this, bottomBitOfIntegerMask);
+ }
+ }
+ break;
+ default:
+ llvm_unreachable("unknown rounding mode");
+ }
+ }
+ // if the significand is zero, make us a canonical zero
+ if (significand.isZero())
+ makeZero(isNegative());
+
+ return opInexact;
+}
+
+opStatus HexFloat::next(bool nextDown) {
+ auto do_increment = [&]() {
+ // incrementing the significand will cause a carry iff all bits are 1
+ if (significand.isAllOnes()) {
+ // then the increment would cause a carry
+ significand.clearAllBits();
+ // set the most significant hexit to 1.
+ significand.setBit(significand.getBitWidth() - 4);
+ exponent++;
+ if (exponent > 64)
+ exponent = exponent % 64;
+ } else {
+ // the easy case --- just increment the significand
+ significand += 1;
+ }
+ };
+ auto do_decrement = [&]() {
+ // There are two cases where it would be unsafe to decrement:
+ // + if the significand were sll zeros. This happens only when
+ // the value is zero. This is checked for and handled separately.
+ // + the value is all zeros except for the least significant
+ // (i.e., the value is smallest denormal). Decrementing would
+ // result in zero. This case, too, is handled specially.
+ // Thus it is safe to decrement.
+ // We assume that the value is currently as normal as possible.
+ significand -= 1;
+ assert(!significand.isZero() && "significand is unexpectedly zero");
+ // normalize if possible
+ if (exponent > -64 && significand.countLeadingZeros() >= 4) {
+ // the top hexit is now zero.
+ // Shift up. We never need to shift more than one hexit.
+ significand <<= 4;
+ exponent--;
+ }
+ };
+
+ // Note that HexFloat does not have special representations for NaN
+ // or infinity
+ if (!nextDown) {
+ // nextUp
+ // deal with the special cases first
+ if (isZero()) {
+ makeSmallest(/* is neg */ false);
+ } else if (isNegative() && isSmallest()) {
+ // value is the negative number with the least magnitude.
+ // IEEE says return -0
+ makeZero(/* is neg */ true);
+ } else {
+ // general case
+ if (isNegative())
+ do_decrement();
+ else
+ do_increment();
+ }
+ } else {
+ // nextDown
+ if (isZero()) {
+ makeSmallest(/* is neg */ true);
+ } else if (!isNegative() && isSmallest()) {
+ // +ve with smallest magnitude
+ makeZero(/* is neg */ false);
+ } else {
+ if (isNegative())
+ do_increment();
+ else
+ do_decrement();
+ }
+ }
+ return opOK;
+}
+
+void HexFloat::changeSign() { sign = !sign; }
+
+opStatus HexFloat::convert(const fltSemantics &toSemantics,
+ roundingMode rounding_mode, bool *losesInfo) {
+ class OnExit {
+ bool *losesInfo;
+
+ public:
+ bool LostInfo = false;
+ OnExit(bool *losesInfo_) : losesInfo(losesInfo_) {}
+ ~OnExit() {
+ if (losesInfo)
+ *losesInfo = LostInfo;
+ }
+ };
+ OnExit OnExit(losesInfo);
+ opStatus fs = opOK;
+
+ const fltSemantics &fromSemantics = *semantics;
+ if (APFloat::usesLayout<HexFloat>(toSemantics)) {
+ // converting from one HexFloat precision to another
+ const int from_bits_precision = getNumPrecisionBits(&fromSemantics);
+ const int to_bits_precision = getNumPrecisionBits(&toSemantics);
+ if (to_bits_precision == from_bits_precision) {
+ // nothing to do, except note no loss of info
+ OnExit.LostInfo = false;
+ } else if (significand.isZero()) {
+ OnExit.LostInfo = false;
+ significand = APInt(to_bits_precision, 0);
+ low_sign = 0;
+ low_exponent = -64;
+ } else if (to_bits_precision > from_bits_precision) {
+ /* widening */
+ significand = significand.zext(to_bits_precision);
+ significand <<= (to_bits_precision - from_bits_precision);
+ OnExit.LostInfo = false;
+ low_sign = 0;
+ low_exponent = -64;
+ } else {
+ // from_bits_precision > to_bits_precision i.e., narrowing
+ // check for loss of precision
+ int num_trailing_zeros = significand.countTrailingZeros();
+ // countTrailingZeros returns the width if there are no set bits,
+ // i.e., if the value is zero. But we know the value isn't zero
+ // because it was checked above.
+ if (num_trailing_zeros >= (from_bits_precision - to_bits_precision)) {
+ // all the bits we're going to truncate are zero
+ OnExit.LostInfo = false;
+ } else {
+ OnExit.LostInfo = true;
+ }
+ // When narrowing, we truncate to the new width without rounding
+ significand = significand.lshr(from_bits_precision - to_bits_precision);
+ significand = significand.trunc(to_bits_precision);
+ }
+ // finally, update the semantics
+ semantics = &toSemantics;
+ } else {
+ llvm_unreachable("attempting to comver HexFloat to something else");
+ }
+ return fs;
+}
+
+/* Comparison requires normalized numbers. */
+cmpResult HexFloat::compare(const HexFloat &rhs) const {
+ cmpResult result;
+
+ assert(semantics == rhs.semantics);
+
+ if (isZero() && rhs.isZero())
+ return cmpEqual;
+
+ /* Two normal numbers. Do they have the same sign? */
+ if (sign != rhs.sign) {
+ if (sign)
+ result = cmpLessThan;
+ else
+ result = cmpGreaterThan;
+ } else {
+ /* Compare absolute values; invert result if negative. */
+ result = compareAbsoluteValue(rhs);
+
+ if (sign) {
+ if (result == cmpLessThan)
+ result = cmpGreaterThan;
+ else if (result == cmpGreaterThan)
+ result = cmpLessThan;
+ }
+ }
+
+ return result;
+}
+
+cmpResult HexFloat::compareAbsoluteValue(const HexFloat &rhs) const {
+ int compare = 0;
+ assert(semantics == rhs.semantics);
+ compare = exponent - rhs.exponent;
+
+ /* If exponents are equal, do an unsigned bignum comparison of the
+ significands. */
+ if (compare == 0)
+ compare = significand.compare(rhs.significand);
+
+ if (compare > 0)
+ return cmpGreaterThan;
+ else if (compare < 0)
+ return cmpLessThan;
+ return cmpEqual;
+}
+
+bool HexFloat::bitwiseIsEqual(const HexFloat &other) const {
+ if (this == &other)
+ return true;
+ if (semantics != other.semantics)
+ return false;
+ if (sign != other.sign)
+ return false;
+ if (exponent != other.exponent)
+ return false;
+ if (semantics == &APFloatBase::HexFP128()) {
+ if (low_sign != other.low_sign || low_exponent != other.low_exponent)
+ return false;
+ }
+ return significand == other.significand;
+}
+
+bool HexFloat::roundAwayFromZero(int sign, const APInt &fraction,
+ roundingMode RM, lostFraction lost_fraction,
+ int bit) {
+ /* Current callers never pass this so we don't handle it. */
+ assert(lost_fraction != lfExactlyZero);
+
+ switch (RM) {
+ case rmNearestTiesToAway:
+ return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
+
+ case rmNearestTiesToEven:
+ if (lost_fraction == lfMoreThanHalf)
+ return true;
+
+ if (lost_fraction == lfExactlyHalf)
+ return APInt::tcExtractBit(fraction.getRawData(), bit);
+
+ return false;
+
+ case rmTowardZero:
+ return false;
+
+ case rmTowardPositive:
+ return !sign;
+
+ case rmTowardNegative:
+ return sign;
+
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid rounding mode found");
+}
+
+opStatus
+HexFloat::convertToSignExtendedInteger(MutableArrayRef<integerPart> output,
+ unsigned int width, bool isSigned,
+ roundingMode RM, bool *isExact) const {
+ class OnExit {
+ bool *IsExact;
+
+ public:
+ bool Exact = false;
+ OnExit(bool *isExact) : IsExact(isExact) {}
+ ~OnExit() {
+ if (IsExact)
+ *IsExact = Exact;
+ }
+ };
+ OnExit OnExit(isExact);
+
+ unsigned int dstPartsCount = partCountForBits(width);
+ assert(dstPartsCount <= output.size() &&
+ "Integer width too large for output");
+
+ if (significand.isZero()) {
+ // then we don't care about the exponent
+ OnExit.Exact = !isNegative();
+ APInt::tcSet(output.data(), 0, dstPartsCount);
+ return opOK;
+ }
+
+ // we can treat the significand as a binary fraction,
+ // and adjust the exponent
+ APInt fraction(significand);
+ int e = exponent * 4; // base 16 to base 2
+
+ // normalize fraction to base 2
+ // the loop must terminate because we know that fraction is not zero
+ while (!fraction.isSignBitSet()) {
+ fraction <<= 1;
+ e--;
+ }
+ assert(fraction.isSignBitSet() &&
+ "top bit unexpectedly not set after normalization");
+
+ // Step 1: place the absolute value of result in output,
+ // truncating as required
+ int truncatedBits;
+ if (e < 1) {
+ // then we have a value less than one
+ APInt::tcSet(output.data(), 0, dstPartsCount);
+ // the number of bits truncated away is nbits plus abs(e)
+ truncatedBits = fraction.getBitWidth() - e;
+ } else {
+ // The integer part of the value is the top e of bits of the fraction
+
+ // check if too large
+ const unsigned int ue =
+ (unsigned int)e; // ease comparisions since we know e >= 0
+ if (ue > width)
+ return opInvalidOp;
+ if (ue < fraction.getBitWidth()) {
+ truncatedBits = fraction.getBitWidth() - e;
+ APInt::tcExtract(output.data(), dstPartsCount, fraction.getRawData(), e,
+ truncatedBits);
+ } else {
+ APInt::tcExtract(output.data(), dstPartsCount, fraction.getRawData(),
+ fraction.getBitWidth(), 0);
+ APInt::tcShiftLeft(output.data(), dstPartsCount,
+ e - fraction.getBitWidth());
+ truncatedBits = 0;
+ }
+ }
+
+ // Step 2: work out the lost fraction, and adjust the result as
+ // dictated by the rounding mode
+ assert(truncatedBits >= 0 && "truncatedButs unexpectedly negative");
+ lostFraction lost_fraction;
+ if (truncatedBits) {
+ lost_fraction = lostFractionThroughTruncation(
+ fraction.getRawData(), fraction.getNumWords(), truncatedBits);
+ if (lost_fraction != lfExactlyZero &&
+ roundAwayFromZero(sign, fraction, RM, lost_fraction, truncatedBits)) {
+
+ if (APInt::tcIncrement(output.data(), dstPartsCount))
+ return opInvalidOp; /* Overflow */
+ }
+ } else {
+ lost_fraction = lfExactlyZero;
+ }
+
+ // Step 3: check result fits in destination
+ unsigned int omsb = APInt::tcMSB(output.data(), dstPartsCount) + 1;
+ if (sign) {
+ if (!isSigned) {
+ // converting negative floating point to unsigned integer
+ if (!APInt::tcIsZero(output.data(), dstPartsCount))
+ return opInvalidOp;
+ } else {
+ // converting negative floating point to signed integer
+
+ // It takes omsb bits to represent the unsigned integer value.
+ // We lose a bit for the sign. Thus if the top bit is set
+ // we can't represent the negative value, unless the value
+ // is -2^n (assuming 2s-complement).
+ unsigned int lsb = APInt::tcLSB(output.data(), dstPartsCount) + 1;
+ if (omsb == width && lsb != omsb)
+ // top bit is set, and there is at least one other set bit.
+ // Hence the negative of this value cannot be repreesnted in
+ // width bits
+ return opInvalidOp;
+
+ if (omsb > width)
+ // this case can happen because of rounding
+ return opInvalidOp;
+ }
+ APInt::tcNegate(output.data(), dstPartsCount);
+ } else {
+ // positive floating point
+ if ((isSigned && (omsb >= width)) || (!isSigned && (omsb >= (width + 1))))
+ // value is too large
+ return opInvalidOp;
+ }
+
+ if (lost_fraction == lfExactlyZero) {
+ OnExit.Exact = true;
+ return opOK;
+ }
+ OnExit.Exact = false;
+ return opInexact;
+}
+
+opStatus HexFloat::convertToInteger(MutableArrayRef<integerPart> output,
+ unsigned int width, bool isSigned,
+ roundingMode RM, bool *isExact) const {
+ opStatus fs;
+ fs = convertToSignExtendedInteger(output, width, isSigned, RM, isExact);
+
+ if (fs == opInvalidOp) {
+ unsigned int bits, dstPartsCount = partCountForBits(width);
+ assert(dstPartsCount <= output.size() && "Integer too big");
+ if (isNegative())
+ bits = isSigned;
+ else
+ bits = width - isSigned;
+ tcSetLeastSignificantBits(output.data(), dstPartsCount, bits);
+ if (isNegative() && isSigned)
+ APInt::tcShiftLeft(output.data(), dstPartsCount, width - 1);
+ }
+ return fs;
+}
+
+opStatus HexFloat::convertFromAPInt(const APInt &input, bool isSigned,
+ roundingMode RM) {
+ APInt api(input); // working copy
+ int s, e; // working copies of sign and exponent
+
+ if (api.isZero()) {
+ makeZero(false);
+ return opOK;
+ }
+
+ if (isSigned && api.isNegative()) {
+ s = 1;
+ api = -api;
+ } else {
+ s = 0;
+ }
+
+ // round up size of api to be multiple of 4,
+ // i.e., a whole number of hexits
+ if (auto r = api.getBitWidth() % 4) {
+ api = api.zext(api.getBitWidth() + 4 - r);
+ }
+
+ APInt mask(api.getBitWidth(), 0);
+ mask.setBit(mask.getBitWidth() - 4); // lowest bit of top hexit
+ // the corresponding constructor for IEEE seems to assume that the
+ // value cannot be negative.
+ e = api.getBitWidth() / 4; // move radix point to left of api
+ // normalize
+ while (mask.ugt(api)) {
+ api <<= 4;
+ e--;
+ }
+ int delta_width = api.getBitWidth() - significand.getBitWidth();
+ if (delta_width > 0) {
+ // the integer has more bits then the HexFloat has precision
+ // We will always need to truncate away the excess.
+ // We do this at the end of this just before the closing
+ // brace of this block.
+ // Between here and where the truncation takes place
+ // we make any adjustments to the part to be retained
+ // required by the rounding mode.
+
+ auto increment = [&]() {
+ bool overFlow = false;
+ APInt delta(api.getBitWidth(), 0);
+ delta.setBit(delta_width);
+ api = api.uadd_ov(delta, overFlow);
+ if (overFlow) {
+ // renormalize api
+ api = api.lshr(4);
+ api.setBit(api.getBitWidth() - 4);
+ e++;
+ }
+ };
+
+ int leastSigBitPos = api.countr_zero();
+ if (leastSigBitPos >= delta_width) {
+ // all zeros in the excess, so just truncate
+ } else {
+ // the chunk to be discarded is non-zero
+ APInt msbDiscard(api.getBitWidth(), 0);
+ msbDiscard.setBit(delta_width - 1);
+ APInt discard(api);
+ // want to clear the top bits, but APInt doesn't provide this
+ discard.flipAllBits();
+ discard.setHighBits(discard.getBitWidth() - delta_width);
+ discard.flipAllBits();
+
+ int cmpVal;
+ if (discard.ult(msbDiscard))
+ cmpVal = -1;
+ else if (discard.ugt(msbDiscard))
+ cmpVal = 1;
+ else
+ cmpVal = 0;
+
+ switch (RM) {
+ case rmNearestTiesToEven:
+ if (cmpVal > 0)
+ increment();
+ else if (cmpVal == 0) {
+ // tie: need to check LSB of bit to be retained
+ APInt lsb(api.getBitWidth(), 0);
+ lsb.setBit(delta_width);
+ if (lsb.intersects(api)) {
+ // then the retained part is "odd", so need increment
+ increment();
+ }
+ }
+ break;
+ case rmNearestTiesToAway:
+ if (cmpVal >= 0)
+ increment();
+ break;
+ case rmTowardPositive:
+ if (!s)
+ increment();
+ break;
+ case rmTowardNegative:
+ if (s)
+ increment();
+ break;
+ case rmTowardZero:
+ // just truncate
+ break;
+ default:
+ break;
+ }
+ }
+ // Now do the truncation
+ api = api.lshr(delta_width);
+ api = api.trunc(significand.getBitWidth());
+ } else if (delta_width < 0) {
+ // APInt::zext extends on left
+ api = api.zext(significand.getBitWidth());
+ api <<= -delta_width;
+ }
+ assert(api.getBitWidth() == significand.getBitWidth());
+ if (e < -64)
+ return opUnderflow;
+ else if (e > 63)
+ return opOverflow;
+
+ sign = s;
+ exponent = e;
+ significand = api;
+ return opOK;
+}
+
+opStatus HexFloat::convertFromSignExtendedInteger(const integerPart *input,
+ unsigned int inputSize,
+ bool isSigned,
+ roundingMode RM) {
+ llvm_unreachable("HexFloat::convertFromSignExtendedInteger not implemented");
+}
+
+opStatus HexFloat::convertFromZeroExtendedInteger(const integerPart *input,
+ unsigned int inputSize,
+ bool isSigned,
+ roundingMode RM) {
+ llvm_unreachable("HexFloat::convertFromZeroExtendedInteger not implemented");
+}
+
+Expected<opStatus> HexFloat::convertFromString(StringRef str,
+ roundingMode rounding_mode) {
+ assert(APFloat::usesLayout<HexFloat>(*semantics) && "Unexpected Semantics");
+
+ if (str.empty())
+ return createError("Invalid string length");
+
+ /* Handle a leading minus sign. */
+ StringRef::iterator p = str.begin();
+ size_t slen = str.size();
+ sign = *p == '-' ? 1 : 0;
+ if (*p == '-' || *p == '+') {
+ p++;
+ slen--;
+ if (!slen)
+ return createError("String has no digits");
+ }
+
+ if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X'))
+ return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
+ rounding_mode);
+ return convertFromDecimalString(StringRef(p, slen), rounding_mode);
+}
+
+opStatus HexFloat::handleOverflow(bool isNeg) {
+ makeLargest(isNeg);
+ return (opStatus)(opOverflow | opInexact);
+}
+
+opStatus HexFloat::handleUnderflow(bool isNeg) {
+ makeZero(isNeg);
+ return (opStatus)(opUnderflow | opInexact);
+}
+
+opStatus HexFloat::handleDenorm(APInt &significand, int &exponent) {
+ if (exponent > 63)
+ return handleOverflow(sign);
+ while (exponent < -64) {
+ if (significand.getLoBits(8) == 0) {
+ significand = significand.udiv(16);
+ exponent = exponent + 1;
+ } else
+ break;
+ }
+ if (exponent < -64 || significand.getHiBits(getNumPrecisionBits()).isZero())
+ return handleUnderflow(sign);
+ return opOK;
+}
+
+Expected<opStatus>
+HexFloat::convertFromHexadecimalString(StringRef str,
+ RoundingMode rounding_mode) {
+ // we are converting from values of the form 0x1234.567p89, where
+ // the radix point and the exponent are optional.
+ // Note that the exponent after the "p" is to base 2, ie, in this
+ // case, 2^89.
+ // The main complexity in this routine is dealing with the radix point.
+ // The approach taken here is to read the significand as hex, initially
+ // ignoring the radix point.
+ // The next task is to make the base 2 exponent a multiple of 4 to ease
+ // normalization later on.
+ // We make the base 2 exponent a multiple of 4 by shifting the value left
+ // (which multiples by 2 with each positioo shifted) and reducing the
+ // base 2 exponent to a multiple of 4.
+ // At this point we can now work in terms of powers of 16 since
+ // 2^4 == 16. From here on we assume the exponent is of powers of 16.
+ // We then take into account the radix point, adjusting the exponent
+ // so that it is on the right of the value.
+ // We then normalize the value by removing leading 0 hexits, and
+ // shifting the value to the left.
+ // We then make the final adjustment to the exponent by shifting it
+ // to the left end; this is the number of hexits in the value.
+ // Shifting the radix point left divides by 16, so the exponent must
+ // be increased by one for each hexit in the value.
+ //
+ // In dealing with the strings, some care is needed as in some cases
+ // the iterators point to an actual value, and in others to one to the right.
+
+ assert(APFloat::usesLayout<HexFloat>(*semantics) && "Unexpected Semantics");
+
+ significand.clearAllBits();
+ exponent = 0;
+ // use one additional hexit than the precision to perform rounding
+ unsigned size = std::max((unsigned)str.size() * 8, getNumPrecisionBits() + 4);
+ APInt tmpSignificand = APInt(size, 0);
+ int exp = 0;
+ // Skip leading zeroes and any (hexa)decimal point.
+ StringRef::iterator begin = str.begin();
+ StringRef::iterator end = str.end();
+ StringRef::iterator dot = str.end();
+
+ // ensure that the size of the working significand is an exact number of
+ // hexits
+ assert((tmpSignificand.getBitWidth() % 4) == 0);
+
+ auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+ if (!PtrOrErr)
+ return PtrOrErr.takeError();
+
+ StringRef::iterator firstSigDigit = *PtrOrErr;
+ StringRef::iterator p;
+ StringRef::iterator lastSigDigit = end;
+
+ size_t expPosition = str.find_insensitive("p");
+ if (expPosition != StringRef::npos)
+ lastSigDigit = begin + expPosition;
+
+ // lastSigDigit is one to the right of the actual last digit
+
+ // if no radix point, set dot to the last significant digit for calculation
+ // purposes
+ if (dot == str.end())
+ dot = lastSigDigit;
+
+ // read the significand, ignoring any radix point.
+ // This, effectively, treats the significand as an integer,
+ // with the (implied) radix point immediately to the right
+ // of the value.
+ // As we loop, we keep track of where the dot is so that
+ // the exponent can be adjusted later.
+ for (p = firstSigDigit; p != lastSigDigit; p++) {
+ if (*p == '.') {
+ if (dot != lastSigDigit)
+ return createError("String contains multiple dots");
+ dot = p;
+ continue;
+ }
+ unsigned hex = hexDigitValue(*p);
+ if (hex == (unsigned)-1)
+ break;
+ tmpSignificand <<= 4;
+ tmpSignificand += hex;
+ }
+
+ // if the significand is zero, it doesn't matter what the exponent is.
+ if (tmpSignificand.isZero()) {
+ makeZero(isNegative());
+ return opOK;
+ }
+
+ // Note: at this point dot points either to the radix point,
+ // in which case it points to the actual position.
+ // Otherwise it is lastSigDigit, which is one beyond the actual
+ // last digit, i.e., the value is an integer.
+
+ if (p != end) {
+ auto ExpOrErr = readExponent(p + 1, end);
+ if (!ExpOrErr)
+ return ExpOrErr.takeError();
+ exp = *ExpOrErr;
+ }
+
+ // at this point we have the base 2 exponent, either 0 if none
+ // was given after a 'p', or the value given by the p.
+ // Before we deal with the position of the radix point
+ // in the significand, we are going to shift the significand
+ // left to make the base 2 exponent a multiple of 4.
+ //
+ // In the following, let e be the exponent, and R = e % 4
+ // We have three cases to consider:
+ // e == 0
+ // no adjustment is needed
+ // e > 0
+ // if R != 0
+ // shifting left by R multiples by 2^R,
+ // so we must reduce e by R
+ // e < 0
+ // we are guaranteed by the C/C++ standards that a/b truncates to 0,
+ // so that -7/4 == -1.
+ // We are also guaranteed that (a/b) * b + a%b == a.
+ // Thus a%b must be negative if a is -ve, and b is +ve.
+ // In this case, we shift left by R + 4, and reduce the
+ // exponent by (R + 4).
+ //
+ // Examples:
+ // x * 2^23
+ // R == 3
+ // x * 2^23 == x * 2^3 * 2^20 == (x << 3) * 2^20
+ // x * 2*-23
+ // R = -3
+ // x * 2^-23 == x * 2^1 * 2^-24 == (x << 1) * 2^-24
+
+ if (int R = (exp % 4)) {
+ if (R < 0)
+ R += 4;
+ tmpSignificand <<= R;
+ exp -= R;
+ }
+
+ // At this point the exponent should br a multiple of 4
+ assert(0 == (exp % 4));
+
+ // convert to base 16 --- we can do this simply by dividing the exponent by 4
+ exp = exp / 4;
+
+ // move the radix point to the right:
+ // lastSigDigit points to one beyond the mantissa (recall, lastSigDigit
+ // is either 'end', or where the "p" is. In either case, it is one
+ // place to the right of actual last digit.
+ // dot is either lastSigDigit if there is no dot, or where the
+ // dot actually is.
+ // If there is no dot, there is no adjustment to make.
+ if (lastSigDigit != dot) {
+ // examples:
+ // string index: 01234567 lastSigDigit dot shift right
+ // ------------------------------------------------------------
+ // 1234.5p1 6 4 1
+ // 0.0012p4 6 1 4
+ exp -= (lastSigDigit - dot - 1);
+ }
+
+ // get rid of leading zeroes, shift left, and adjust exponent
+ unsigned leadingZeros = tmpSignificand.countl_zero() / 4;
+ tmpSignificand <<= leadingZeros * 4;
+ exp += (tmpSignificand.getBitWidth() / 4 - leadingZeros);
+
+ if (exp > 63 || exp < -64) {
+ opStatus fs = handleDenorm(tmpSignificand, exp);
+ if (fs != opOK)
+ return fs;
+ }
+
+ // check and perform rounding by checking the bit at precision + 1
+ unsigned excessBits =
+ tmpSignificand.getBitWidth() - getNumPrecisionBits() - 1;
+ if (tmpSignificand[excessBits]) {
+ APInt mask(tmpSignificand.getBitWidth(), 0);
+ mask.setBit(excessBits);
+ tmpSignificand += mask;
+ }
+
+ significand = tmpSignificand.getHiBits(getNumPrecisionBits())
+ .trunc(getNumPrecisionBits());
+ exponent = exp;
+
+ return opOK;
+}
+
+Expected<opStatus>
+HexFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
+ assert(APFloat::usesLayout<HexFloat>(*semantics) && "Unexpected Semantics");
+ decimalInfo D;
+ significand.clearAllBits();
+ exponent = 0;
+
+ /* Scan the text. */
+ StringRef::iterator p = str.begin();
+ if (Error Err = interpretDecimal(p, str.end(), &D))
+ return std::move(Err);
+
+ /* Handle the quick cases. First the case of no significant digits,
+ i.e. zero, and then exponents that are obviously too large or too
+ small. Writing L for log 10 / log 2, maxExponent2 is the binary
+ exponent, and precision is in binary bits, a number d.ddddd*10^exp
+ definitely overflows if
+
+ (exp - 1) * L >= maxExponent
+
+ and definitely underflows to zero where
+
+ (exp + 1) * L <= minExponent - precision
+
+ With integer arithmetic the tightest bounds for L are
+
+ 93/28 < L < 196/59 [ numerator <= 256 ]
+ 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
+ */
+
+ // Test if we have a zero number allowing for strings with no null terminators
+ // and zero decimals with non-zero exponents.
+ //
+ // We computed firstSigDigit by ignoring all zeros and dots. Thus if
+ // D->firstSigDigit equals str.end(), every digit must be a zero and there can
+ // be at most one dot. On the other hand, if we have a zero with a non-zero
+ // exponent, then we know that D.firstSigDigit will be non-numeric.
+ // + maximum HexFloat exponent is 63. Maximum fraction is 1 - epsilon,
+ // so the maximum value that can be represented in HexFloat is < 16^63
+ // We want to find the exponent of the maximum decimal that can be
+ // represented
+ // 16^63 = 10^x ==> 63 * l(16) = x * l(10)
+ // x = 63 * l(16) / l(10 = 75.86
+ // + rpeating for the minimum exponent of -64, x = -77.06
+
+ int max10Exponent = 75;
+ int min10Exponent = -78;
+ if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
+ category = fcZero;
+ makeZero(sign);
+ return opOK;
+ }
+ /* Check whether the normalized exponent is high enough to overflow*/
+ if (D.normalizedExponent > max10Exponent)
+ return handleOverflow(sign);
+ /* If it wasn't, then it also wasn't high enough to overflow max
+ during the log-rebasing in the min-exponent check. Check that it
+ won't overflow min*/
+ if (D.normalizedExponent <
+ min10Exponent - (int)getNumPrecisionBits() * 59 / 196) {
+ /* Underflow to zero and round. */
+ category = fcNormal;
+ return handleUnderflow(sign);
+ }
+
+ /* A tight upper bound on number of bits required to hold an
+ N-digit decimal integer is N * 196 / 59. The bits required for a
+ multiplication of M * N bits is (M + N). Allocate enough space to hold
+ the full significand.*/
+ unsigned decDigits =
+ static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
+ int exp = D.exponent > 0 ? D.exponent : -D.exponent;
+ unsigned size = (decDigits + exp + 1) * 196 / 59;
+ size += 4 - size % 4; // make sure size is divisible by 4 for hexits
+ // use one additional hexit than the precision to perform rounding
+ size = std::max(size, getNumPrecisionBits() + 4);
+ StringRef sig = StringRef(D.firstSigDigit, decDigits);
+
+ /* Build the significand from the string */
+ APInt tmpSignificand = APInt(size, 0);
+ int tmpExponent = 0;
+ StringRef::iterator e = sig.end();
+ for (StringRef::iterator p = sig.begin(); p != e; p++) {
+ if (*p != '.') {
+ tmpSignificand *= 10;
+ tmpSignificand += *p - '0';
+ }
+ }
+ /* Build the multiplier from the exponent
+ * 10^n = 5^n * 2^n */
+ APInt multiplier = APInt(size, 1);
+ for (int i = 0; i < exp; i++) {
+ multiplier *= 5;
+ multiplier <<= 1;
+ }
+ /* If the exponent is greater than 0, mulitply the significand and
+ * multiplier. If the exponent is negative then divide the significand by
+ * the multiplier. */
+ if (D.exponent >= 0)
+ tmpSignificand *= multiplier;
+ else {
+ /* Extend the values to 4x the bit width so when we perform division, there
+ * is no loss of precision. This also ensures the width is divisible
+ * by 4 so we can convert 2^n to 16^n */
+ int extend_to = 4 * tmpSignificand.getBitWidth();
+ int extend_by = extend_to - tmpSignificand.getBitWidth();
+ APInt top = tmpSignificand.zext(extend_to);
+ APInt bottom = multiplier.zext(extend_to);
+ top <<= extend_by;
+ tmpSignificand = top.udiv(bottom);
+ tmpExponent = -extend_by / 4;
+ }
+ // get rid of leading zeroes
+ unsigned leadingZeros = tmpSignificand.countl_zero() / 4;
+ tmpSignificand <<= leadingZeros * 4;
+ tmpExponent -= leadingZeros;
+
+ tmpExponent += tmpSignificand.getBitWidth() / 4;
+
+ if (tmpExponent > 63 || tmpExponent < -64) {
+ opStatus fs = handleDenorm(tmpSignificand, tmpExponent);
+ if (fs != opOK)
+ return fs;
+ }
+
+ // check and perform rounding by checking the bit at precision + 1
+ unsigned excessBits =
+ tmpSignificand.getBitWidth() - getNumPrecisionBits() - 1;
+ if (tmpSignificand[excessBits]) {
+ APInt mask(tmpSignificand.getBitWidth(), 0);
+ mask.setBit(excessBits);
+ tmpSignificand += mask;
+ }
+
+ significand = tmpSignificand.getHiBits(getNumPrecisionBits())
+ .trunc(getNumPrecisionBits());
+ exponent = tmpExponent;
+
+ return opOK;
+}
+
+APInt HexFloat::bitcastToAPInt() const {
+ auto get_sign_exponent_byte = [](int s, int e) {
+ return s << 7 | ((e + 64) & 0x7f);
+ };
+ const bool is128 = semantics == &APFloatBase::HexFP128();
+ const int NumPrecisionBits = getNumPrecisionBits();
+ const int width = NumPrecisionBits + (is128 ? 16 : 8);
+ APInt sign_and_exponent(width, get_sign_exponent_byte(sign, exponent));
+ sign_and_exponent <<= (width - 8);
+ APInt Ret(sign_and_exponent);
+
+ if (is128) {
+ APInt low(significand.extractBits(56, 0));
+ APInt high(significand.extractBits(56, 56));
+ APInt low_sign_exponent(width,
+ get_sign_exponent_byte(low_sign, low_exponent));
+ low = low.zext(width);
+ high = high.zext(width);
+ high <<= 64;
+ low_sign_exponent <<= 56;
+ Ret |= high;
+ Ret |= low_sign_exponent;
+ Ret |= low;
+ } else {
+ Ret |= (significand.zextOrTrunc(width));
+ }
+ return Ret;
+}
+
+unsigned int HexFloat::convertToHexString(char *DST, unsigned int HexDigits,
+ bool upperCase,
+ roundingMode RM) const {
+ llvm_unreachable("HexFloat::convertToHexString nor supported");
+}
+
+bool HexFloat::isZero() const {
+ // exponent, and signficant are zero for zero value
+ return exponent == -64 && significand.isZero();
+}
+
+bool HexFloat::isNegative() const {
+ // like IEEE, zeros can be -ve, so we ignore the exponent/fraction
+ return sign;
+}
+
+bool HexFloat::isDenormal() const {
+ // It seems that HexFloat allows arbitrary denormals,
+ // i.e., when the exponent is other than -64.
+ // We need, therefore, to look at the most significant hexit.
+ return significand.countLeadingZeros() >= 4;
+}
+
+bool HexFloat::isSmallest() const {
+ return exponent == -64 && significand.isOne();
+}
+
+bool HexFloat::isLargest() const {
+ return exponent == 63 && significand.isAllOnes();
+}
+
+bool HexFloat::isInteger() const {
+ // as for the IEEE case
+ HexFloat truncated = *this;
+ truncated.roundToIntegral(rmTowardZero);
+ return compare(truncated) == cmpEqual;
+}
+
+bool HexFloat::isSmallestNormalized() const {
+ // we ignore the sign for this predicate --- see the corresponding IEEE method
+ return exponent == -64 && significand.isOneBitSet(getNumPrecisionBits() - 4);
+}
+
+int HexFloat::getExactLog2Abs() const {
+ if (isZero())
+ return INT_MIN;
+ if (1 != significand.popcount())
+ return INT_MIN; // not exactly one bit set
+
+ // exponent is to base 16. The corresponding power of 2 is 4 * exponent.
+ // The significand represents a fraction. There is no implicit bit.
+ // The most significant (left-most) bit represents 2^(-1).
+ return 4 * exponent - (significand.countl_zero() + 1);
+}
+
+void HexFloat::toString(SmallVectorImpl<char> &str, unsigned precision,
+ unsigned maxPadding, bool truncateZero) const {
+
+ // handle 0 as a special case
+ if (isZero()) {
+ if (isNegative())
+ str.push_back('-');
+
+ if (!maxPadding) {
+ if (truncateZero)
+ append(str, "0.0E+0");
+ else {
+ append(str, "0.0");
+ if (precision > 1)
+ str.append(precision - 1, '0');
+ append(str, "e+00");
+ }
+ } else
+ str.push_back('0');
+ return;
+ }
+
+ // hand off to toStringImpl.
+ // toStringImpl expects the significand to be in base 2,
+ // so we need to adjust the expoent (recall, 16 == 2^4,
+ // so 16^x == 2^4x).
+ // Additionally, toStringImpl treats the significand
+ // as an integer, so we need to adjust the exponent
+ // so the radix point is on the right.
+ int e = 4 * exponent - significand.getBitWidth();
+ toStringImpl(str, isNegative(), e, significand, precision, maxPadding,
+ truncateZero);
+}
+
+hash_code HexFloat::hash_value() const {
+ if (isZero()) {
+ return hash_combine(semantics);
+ }
+ return hash_combine(getNumPrecisionBits(), (uint8_t)sign, exponent,
+ ::hash_value(significand));
+}
+
+int ilogb(const HexFloat &Arg) {
+ if (Arg.isZero())
+ return APFloatBase::IEK_Zero;
+
+ // need to be careful in case significand is not normalized
+ int nLeadingZeroHexits = Arg.significand.countLeadingZeros() / 4;
+ return Arg.exponent - 1 - nLeadingZeroHexits;
+}
+
+HexFloat scalbn(HexFloat X, int Exp, roundingMode RoundingMode) {
+ if (Exp == 0 || X.isZero()) {
+ // Exp == 0 ==> multiplying by 1
+ // isZero() ==> value will be zero after multiplication
+ return X;
+ }
+
+ // first normalize X, adjusting significand in place,
+ // and assigning interim exponent to e
+ int e = X.exponent;
+ int nLeadingZeroHexits = X.significand.countLeadingZeros() / 4;
+ if (nLeadingZeroHexits) {
+ X.significand <<= (4 * nLeadingZeroHexits);
+ e -= nLeadingZeroHexits;
+ }
+
+ // now multiply by 16^Exp
+ e += Exp;
+ if (e > 63) {
+ // overflow --- clamp to maximum value
+ X.exponent = 63;
+ X.significand.setAllBits();
+ } else {
+ if (e < -64) {
+ X.exponent = -64;
+ // see if we can denormalize the significand
+ int nHexits = X.significand.getBitWidth() / 4;
+ int placesToShift = -64 - e;
+ if (placesToShift <= nHexits) {
+ // example: e is -66, so -64 - e == 2
+ // if 2 is less than the number of hexits
+ // in the significand, we can denormalize
+ X.significand = X.significand.lshr(placesToShift * 4);
+ } else {
+ // too small to denormalize
+ X.significand.clearAllBits();
+ }
+ } else {
+ // ordinary case
+ X.exponent = e;
+ }
+ }
+
+ return X;
+}
+
+HexFloat frexp(HexFloat X, int &Exp, roundingMode RM) {
+ if (X.isZero()) {
+ Exp = 0;
+ X.makeZero(X.isNegative());
+ return X;
+ }
+ Exp = X.exponent;
+ // normalize
+ int NumLeadingZeroHexits = X.significand.countLeadingZeros() / 4;
+ if (NumLeadingZeroHexits > 0) {
+ X.significand <<= (NumLeadingZeroHexits * 4);
+ Exp -= NumLeadingZeroHexits;
+ }
+ X.exponent = 0;
+ if (X.semantics == &APFloat::HexFP128())
+ X.low_exponent = -14;
+ // convert from base 16 to baes 2
+ Exp *= 4;
+ // ensure that 1 < |significand| <= 1/2
+ // Note that we are now working with a exponent of base 2.
+ while (!X.significand.isSignBitSet()) {
+ X.significand <<= 1;
+ Exp--;
+ }
+ return X;
+}
+
+void HexFloat::dump() const {
+ SmallString<32> S;
+ significand.toStringUnsigned(S, 16);
+ dbgs() << "(" << (isNegative() ? "1" : "0") << "/" << low_sign << ", "
+ << exponent << "/" << low_exponent << ", ("
+ << significand.getBitWidth() << ", " << S << ")"
+ << ")\n";
+}
+
+} // namespace detail
+
+APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
+ if (usesLayout<IEEEFloat>(Semantics)) {
+ new (&IEEE) IEEEFloat(std::move(F));
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(Semantics)) {
+ const fltSemantics& S = F.getSemantics();
+ new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
+ APFloat(APFloatBase::IEEEdouble()));
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
+ roundingMode RM) {
+ APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
+}
+
+hash_code hash_value(const APFloat &Arg) {
+ if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
+ return hash_value(Arg.U.IEEE);
+ if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
+ return hash_value(Arg.U.Double);
+ if (APFloat::usesLayout<detail::HexFloat>(Arg.getSemantics()))
+ return Arg.U.Hex.hash_value();
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
+ : APFloat(Semantics) {
+ auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
+ assert(StatusOrErr && "Invalid floating point representation");
+ consumeError(StatusOrErr.takeError());
+}
+
+FPClassTest APFloat::classify() const {
+ if (isZero())
+ return isNegative() ? fcNegZero : fcPosZero;
+ if (isNormal())
+ return isNegative() ? fcNegNormal : fcPosNormal;
+ if (isDenormal())
+ return isNegative() ? fcNegSubnormal : fcPosSubnormal;
+ if (isInfinity())
+ return isNegative() ? fcNegInf : fcPosInf;
+ assert(isNaN() && "Other class of FP constant");
+ return isSignaling() ? fcSNan : fcQNan;
+}
+
+bool APFloat::getExactInverse(APFloat *Inv) const {
+ // Only finite, non-zero numbers can have a useful, representable inverse.
+ // This check filters out +/- zero, +/- infinity, and NaN.
+ if (!isFiniteNonZero())
+ return false;
+
+ // Historically, this function rejects subnormal inputs. One reason why this
+ // might be important is that subnormals may behave differently under FTZ/DAZ
+ // runtime behavior.
+ if (isDenormal())
+ return false;
+
+ // A number has an exact, representable inverse if and only if it is a power
+ // of two.
+ //
+ // Mathematical Rationale:
+ // 1. A binary floating-point number x is a dyadic rational, meaning it can
+ // be written as x = M / 2^k for integers M (the significand) and k.
+ // 2. The inverse is 1/x = 2^k / M.
+ // 3. For 1/x to also be a dyadic rational (and thus exactly representable
+ // in binary), its denominator M must also be a power of two.
+ // Let's say M = 2^m.
+ // 4. Substituting this back into the formula for x, we get
+ // x = (2^m) / (2^k) = 2^(m-k).
+ //
+ // This proves that x must be a power of two.
+
+ // The following will work iff the radix of the representation is a power of 2
+ // A power of 2 has exactly one bit set.
+ const unsigned int R = getSemantics().radix;
+ assert(llvm::has_single_bit(R) && "radix is not a power of 2");
+
+ // getExactLog2Abs() returns the integer exponent if the number is a power of
+ // two or INT_MIN if it is not.
+ const int Exp = getExactLog2Abs();
+ if (Exp == INT_MIN)
+ return false;
+
+ // Let R be the radix, and let R be a power of 2, and let r = lg2(R)
+ // then:
+ // 2^Exp = R ^ floor(Exp/r) * 2 ^ (Exp mod r)
+ // We can compute R ^ floor(Exp/r) using scalbn.
+ // We can compute 2 ^ (exp mod r) using left shifts.
+ // We need to take a little care when calculating floor and mod
+ // to handle negative exponents correctly.
+
+ APFloat Reciprocal(getSemantics());
+ if (R == 2) {
+ // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
+ // scaling 1.0 by the negated exponent.
+ Reciprocal =
+ scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
+ rmTowardZero);
+ } else {
+ // general case
+ // The exact inverse of 2^Exp is 2^-Exp
+ // Let Exp = Ar + b where 0 <\ !B| < r
+ // then
+ // 2^(Ar + B) == 2^(Ar) * 2^(B) == R^A * 2^B
+ // and
+ // 2 ^ -(Ar + B) == 2^-Ar * 2^-B == R^-A * 2^-B
+ // We will use left shifts to exponentiate the 2.
+ // Thus we need the power of 2 to be >= 0.
+ // The code below sets up p2 to be <= 0 so that we shift left by -p2.
+
+ const unsigned int r = llvm::countr_zero(R);
+ int pR, p2;
+
+ if (Exp >= 0) {
+ pR = Exp / r;
+ p2 = Exp % r;
+ if (p2 > 0) {
+ p2 -= r;
+ pR++;
+ }
+ } else {
+ pR = -(-Exp / r);
+ p2 = -(-Exp % r);
+ }
+
+ APInt PowerOf2AsAPInt(32, 1 << -p2);
+ APFloat PowerOf2AsAPFloat(getSemantics());
+ auto convertStatus = PowerOf2AsAPFloat.convertFromAPInt(
+ PowerOf2AsAPInt, /* isSigned */ false, rmTowardZero);
+ assert(convertStatus == opOK &&
+ "error converting PowerOf2AsAPint to an APFloat");
+ if (isNegative() != PowerOf2AsAPFloat.isNegative())
+ PowerOf2AsAPFloat.changeSign();
+ assert(isNegative() == PowerOf2AsAPFloat.isNegative());
+ Reciprocal = scalbn(PowerOf2AsAPFloat, -pR, rmTowardZero);
+ }
+
+ // scalbn might round if the resulting exponent -Exp is outside the
+ // representable range, causing overflow (to infinity) or underflow. We
+ // must verify that the result is still the exact power of two we expect.
+ if (Reciprocal.getExactLog2Abs() != -Exp)
+ return false;
+
+ // Avoid multiplication with a subnormal, it is not safe on all platforms and
+ // may be slower than a normal division.
+ if (Reciprocal.isDenormal())
+ return false;
+
+ assert(Reciprocal.isFiniteNonZero());
+
+ if (Inv)
+ *Inv = std::move(Reciprocal);
+
+ return true;
+}
+
+APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
+ roundingMode RM, bool *losesInfo) {
+ if (&getSemantics() == &ToSemantics) {
+ *losesInfo = false;
+ return opOK;
+ }
+ if (usesLayout<IEEEFloat>(getSemantics()) &&
+ usesLayout<IEEEFloat>(ToSemantics))
+ return U.IEEE.convert(ToSemantics, RM, losesInfo);
+ if (usesLayout<IEEEFloat>(getSemantics()) &&
+ usesLayout<DoubleAPFloat>(ToSemantics)) {
+ assert(&ToSemantics == &APFloatBase::semPPCDoubleDouble);
+ auto Ret =
+ U.IEEE.convert(APFloatBase::semPPCDoubleDoubleLegacy, RM, losesInfo);
+ *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
+ return Ret;
+ }
+ if (usesLayout<DoubleAPFloat>(getSemantics()) &&
+ usesLayout<IEEEFloat>(ToSemantics)) {
+ auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
+ *this = APFloat(std::move(getIEEE()), ToSemantics);
+ return Ret;
+ }
+ if (usesLayout<HexFloat>(getSemantics()) &&
+ usesLayout<HexFloat>(ToSemantics)) {
+ return U.Hex.convert(ToSemantics, RM, losesInfo);
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
+ return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
+}
+
+void APFloat::print(raw_ostream &OS) const {
+ SmallVector<char, 16> Buffer;
+ toString(Buffer);
+ OS << Buffer;
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void APFloat::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
+}
+#endif
+
+void APFloat::Profile(FoldingSetNodeID &NID) const {
+ NID.Add(bitcastToAPInt());
+}
+
+APFloat::opStatus APFloat::convertToInteger(APSInt &result,
+ roundingMode rounding_mode,
+ bool *isExact) const {
+ unsigned bitWidth = result.getBitWidth();
+ SmallVector<uint64_t, 4> parts(result.getNumWords());
+ opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
+ rounding_mode, isExact);
+ // Keeps the original signed-ness.
+ result = APInt(bitWidth, parts);
+ return status;
+}
+
+double APFloat::convertToDouble() const {
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEdouble)
+ return getIEEE().convertToDouble();
+ assert(isRepresentableBy(getSemantics(), semIEEEdouble) &&
+ "Float semantics is not representable by IEEEdouble");
+ APFloat Temp = *this;
+ bool LosesInfo;
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
+ assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
+ (void)St;
+ return Temp.getIEEE().convertToDouble();
+}
+
+#ifdef HAS_IEE754_FLOAT128
+float128 APFloat::convertToQuad() const {
+ if (&getSemantics() == (const llvm::fltSemantics *)&APFloatBase::semIEEEquad)
+ return getIEEE().convertToQuad();
+ assert(isRepresentableBy(getSemantics(), semIEEEquad) &&
+ "Float semantics is not representable by IEEEquad");
+ APFloat Temp = *this;
+ bool LosesInfo;
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEquad, rmNearestTiesToEven, &LosesInfo);
+ assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
+ (void)St;
+ return Temp.getIEEE().convertToQuad();
+}
+#endif
+
+float APFloat::convertToFloat() const {
+ if (&getSemantics() ==
+ (const llvm::fltSemantics *)&APFloatBase::semIEEEsingle)
+ return getIEEE().convertToFloat();
+ assert(isRepresentableBy(getSemantics(), semIEEEsingle) &&
+ "Float semantics is not representable by IEEEsingle");
+ APFloat Temp = *this;
+ bool LosesInfo;
+ opStatus St =
+ Temp.convert(APFloatBase::semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
+ assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
+ (void)St;
+ return Temp.getIEEE().convertToFloat();
+}
+
+bool APFloatBase::isValidArbitraryFPFormat(StringRef Format) {
+ static constexpr StringLiteral ValidFormats[] = {
+ "Float8E5M2", "Float8E5M2FNUZ", "Float8E4M3", "Float8E4M3FN",
+ "Float8E4M3FNUZ", "Float8E4M3B11FNUZ", "Float8E3M4", "Float8E8M0FNU",
+ "Float6E3M2FN", "Float6E2M3FN", "Float4E2M1FN"};
+ return llvm::is_contained(ValidFormats, Format);
+}
+
+APFloat::Storage::~Storage() {
+ if (usesLayout<IEEEFloat>(*semantics)) {
+ IEEE.~IEEEFloat();
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*semantics)) {
+ Double.~DoubleAPFloat();
+ return;
+ }
+ if (usesLayout<HexFloat>(*semantics)) {
+ Hex.~HexFloat();
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::Storage::Storage(const APFloat::Storage &RHS) {
+ if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+ new (this) IEEEFloat(RHS.IEEE);
+ return;
+ }
+ if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+ new (this) DoubleAPFloat(RHS.Double);
+ return;
+ }
+ if (usesLayout<HexFloat>(*RHS.semantics)) {
+ new (this) HexFloat(RHS.Hex);
+ return;
+ }
+ llvm_unreachable("Unexpected semantics");
}
APFloat::Storage::Storage(APFloat::Storage &&RHS) {
@@ -6118,6 +8234,10 @@ APFloat::Storage::Storage(APFloat::Storage &&RHS) {
new (this) DoubleAPFloat(std::move(RHS.Double));
return;
}
+ if (usesLayout<HexFloat>(*RHS.semantics)) {
+ new (this) HexFloat(std::move(RHS.Hex));
+ return;
+ }
llvm_unreachable("Unexpected semantics");
}
@@ -6128,6 +8248,9 @@ APFloat::Storage &APFloat::Storage::operator=(const APFloat::Storage &RHS) {
} else if (usesLayout<DoubleAPFloat>(*semantics) &&
usesLayout<DoubleAPFloat>(*RHS.semantics)) {
Double = RHS.Double;
+ } else if (usesLayout<HexFloat>(*semantics) &&
+ usesLayout<HexFloat>(*RHS.semantics)) {
+ Hex = RHS.Hex;
} else if (this != &RHS) {
this->~Storage();
new (this) Storage(RHS);
@@ -6142,6 +8265,9 @@ APFloat::Storage &APFloat::Storage::operator=(APFloat::Storage &&RHS) {
} else if (usesLayout<DoubleAPFloat>(*semantics) &&
usesLayout<DoubleAPFloat>(*RHS.semantics)) {
Double = std::move(RHS.Double);
+ } else if (usesLayout<HexFloat>(*semantics) &&
+ usesLayout<HexFloat>(*RHS.semantics)) {
+ Hex = std::move(RHS.Hex);
} else if (this != &RHS) {
this->~Storage();
new (this) Storage(std::move(RHS));
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 8ff3efe64c29b..f76251c88a8fa 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -82,6 +82,9 @@ class IEEEFloatUnitTestHelper {
namespace {
+const fltSemantics *HexFloatSemantics[] = {
+ &APFloat::HexFP32(), &APFloat::HexFP64(), &APFloat::HexFP128()};
+
TEST(APFloatTest, isSignaling) {
// We test qNaN, -qNaN, +sNaN, -sNaN with and without payloads. *NOTE* The
// positive/negative distinction is included only since the getQNaN/getSNaN
@@ -557,7 +560,7 @@ TEST(APFloatTest, FMA) {
{
APFloat f1(0.0);
APFloat f2(-0.0);
- APFloat f3(-0.0);
+ APFloat f3(f2);
f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
EXPECT_TRUE(f1.isNegative() && f1.isZero());
}
@@ -700,6 +703,89 @@ TEST(APFloatTest, FMA) {
}
}
+TEST(APFloatTest, FMAHexFloat) {
+ APFloat::roundingMode rdmd = APFloat::rmNearestTiesToEven;
+
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat f1(*S, "14.5");
+ APFloat f2(*S, "-14.5");
+ APFloat f3(*S, "225.0");
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat(*S, "14.75"), f1) << " Semantics: " << SemanticsName;
+ }
+
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat Val2(*S, "2.0");
+ APFloat f1(APFloat::getSmallestNormalized(*S));
+ APFloat f2(f1);
+ f1.divide(Val2, rdmd);
+ f2.divide(Val2, rdmd);
+ APFloat f3(*S, "12.0");
+ f1.fusedMultiplyAdd(f2, f3, rdmd);
+ EXPECT_EQ(f3, f1) << " Semantics: " << SemanticsName;
+ }
+
+ // Test for correct zero sign when answer is exactly zero.
+ // fma(1.0, -1.0, 1.0) -> +ve 0.
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat f1(APFloat::getOne(*S, false));
+ APFloat f2(APFloat::getOne(*S, true));
+ APFloat f3(f1);
+ f1.fusedMultiplyAdd(f2, f3, rdmd);
+ EXPECT_TRUE(!f1.isNegative()) << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(f1.isZero()) << " Semantics: " << SemanticsName;
+ }
+
+ // If result is zero, FMA always returns +ve zero
+ // Test for correct zero sign when answer is exactly zero and rounding towards
+ // negative.
+ // fma(1.0, -1.0, 1.0) -> +ve 0.
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat f1(APFloat::getOne(*S, false));
+ APFloat f2(APFloat::getOne(*S, true));
+ APFloat f3(f1);
+ f1.fusedMultiplyAdd(f2, f3, rdmd);
+ EXPECT_TRUE(!f1.isNegative()) << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(f1.isZero()) << " Semantics: " << SemanticsName;
+ }
+
+ // Test for correct (in this case -ve) sign when adding like signed zeros.
+ // Test fma(0.0, -0.0, -0.0) -> +ve 0.
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat f1(APFloat::getZero(*S, false));
+ APFloat f2(APFloat::getZero(*S, true));
+ APFloat f3(f2);
+ f1.fusedMultiplyAdd(f2, f3, rdmd);
+ EXPECT_TRUE(!f1.isNegative()) << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(f1.isZero()) << " Semantics: " << SemanticsName;
+ }
+
+ // Test when small negative results underflow.
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat f1(APFloat::getSmallest(*S, true));
+ APFloat f2(APFloat::getSmallest(*S, false));
+ APFloat f3(APFloat::getZero(*S, false));
+ f1.fusedMultiplyAdd(f2, f3, rdmd);
+ EXPECT_TRUE(!f1.isNegative()) << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(f1.isZero()) << " Semantics: " << SemanticsName;
+ }
+
+ // Test using only a single instance of APFloat.
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat F(*S, "1.5");
+
+ F.fusedMultiplyAdd(F, F, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat(*S, "3.75"), F) << " Semantics: " << SemanticsName;
+ }
+}
+
TEST(APFloatTest, MinNum) {
APFloat f1(1.0);
APFloat f2(2.0);
@@ -811,6 +897,29 @@ TEST(APFloatTest, MaxNum) {
}
}
+TEST(APFloatTest, MinMaxNumHexFloat) {
+ for (const auto *S : HexFloatSemantics) {
+ APFloat One(APFloat::getOne(*S, false));
+ APFloat Two(*S, "2.0");
+ auto SemanticsName = APFloat::semanticsName(*S);
+
+ EXPECT_EQ(One, minnum(One, Two)) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(One, minnum(Two, One)) << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(Two, maxnum(One, Two)) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(Two, maxnum(Two, One)) << " Semantics: " << SemanticsName;
+
+ APFloat zp(APFloat::getZero(*S, false));
+ APFloat zn(APFloat::getZero(*S, true));
+
+ EXPECT_EQ(zn, minnum(zp, zn)) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(zn, minnum(zn, zp)) << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(zp, maxnum(zp, zn)) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(zp, maxnum(zn, zp)) << " Semantics: " << SemanticsName;
+ }
+}
+
TEST(APFloatTest, Minimum) {
APFloat f1(1.0);
APFloat f2(2.0);
@@ -1027,6 +1136,83 @@ TEST(APFloatTest, Denormal) {
EXPECT_TRUE(NegT.isDenormal());
EXPECT_EQ(fcNegSubnormal, NegT.classify());
}
+
+ // Test HexFloat
+ {
+ bool HasError;
+ const char *MinDenorm32 = "0x0.000001p-256";
+ APFloat HFP32(APFloat::HexFP32());
+
+ HasError = !HFP32.convertFromString(MinDenorm32, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP32.isDenormal());
+ EXPECT_FALSE(HFP32.isNormal());
+ EXPECT_TRUE(HFP32.isNonZero());
+ EXPECT_TRUE(HFP32.isSmallest());
+ HFP32.next(true);
+ EXPECT_TRUE(HFP32.isZero());
+
+ const char *MinNorm32 = "0x0.100000p-256";
+ HasError = !HFP32.convertFromString(MinNorm32, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_FALSE(HFP32.isDenormal());
+ EXPECT_TRUE(HFP32.isNormal());
+ EXPECT_TRUE(HFP32.isNonZero());
+ EXPECT_FALSE(HFP32.isSmallest());
+ HFP32.next(true);
+ EXPECT_TRUE(HFP32.isDenormal());
+ EXPECT_FALSE(HFP32.isNormal());
+ EXPECT_TRUE(HFP32.isNonZero());
+ EXPECT_FALSE(HFP32.isSmallest());
+
+ const char *MinDenorm64 = "0x0.00000000000001p-256";
+ APFloat HFP64(APFloat::HexFP64());
+ HasError = !HFP64.convertFromString(MinDenorm64, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP64.isDenormal());
+ EXPECT_FALSE(HFP64.isNormal());
+ EXPECT_TRUE(HFP64.isNonZero());
+ EXPECT_TRUE(HFP64.isSmallest());
+ HFP64.next(true);
+ EXPECT_TRUE(HFP64.isZero());
+
+ const char *MinNorm64 = "0x0.10000000000000p-256";
+ HasError = !HFP64.convertFromString(MinNorm64, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_FALSE(HFP64.isDenormal());
+ EXPECT_TRUE(HFP64.isNormal());
+ EXPECT_TRUE(HFP64.isNonZero());
+ EXPECT_FALSE(HFP64.isSmallest());
+ HFP64.next(true);
+ EXPECT_TRUE(HFP64.isDenormal());
+ EXPECT_FALSE(HFP64.isNormal());
+ EXPECT_TRUE(HFP64.isNonZero());
+ EXPECT_FALSE(HFP64.isSmallest());
+
+ const char *MinDenorm128 = "0x0.0000000000000000000000000001p-256";
+ APFloat HFP128(APFloat::HexFP128());
+ HasError = !HFP128.convertFromString(MinDenorm128, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP128.isDenormal());
+ EXPECT_FALSE(HFP128.isNormal());
+ EXPECT_TRUE(HFP128.isNonZero());
+ EXPECT_TRUE(HFP128.isSmallest());
+ HFP128.next(true);
+ EXPECT_TRUE(HFP128.isZero());
+
+ const char *MinNorm128 = "0x0.1000000000000000000000000000p-256";
+ HasError = !HFP128.convertFromString(MinNorm128, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_FALSE(HFP128.isDenormal());
+ EXPECT_TRUE(HFP128.isNormal());
+ EXPECT_TRUE(HFP128.isNonZero());
+ EXPECT_FALSE(HFP128.isSmallest());
+ HFP128.next(true);
+ EXPECT_TRUE(HFP128.isDenormal());
+ EXPECT_FALSE(HFP128.isNormal());
+ EXPECT_TRUE(HFP128.isNonZero());
+ EXPECT_FALSE(HFP128.isSmallest());
+ }
}
TEST(APFloatTest, IsSmallestNormalized) {
@@ -1109,6 +1295,17 @@ TEST(APFloatTest, getOne) {
1.0f);
EXPECT_EQ(APFloat::getOne(APFloat::IEEEsingle(), true).convertToFloat(),
-1.0f);
+
+ for (const auto *S : HexFloatSemantics) {
+ APFloat HFP(APFloat::getOne(*S));
+ llvm::SmallVector<char, 100> Buffer;
+ HFP.toString(Buffer);
+ EXPECT_EQ("1", std::string(Buffer.data(), Buffer.size()));
+ HFP = APFloat::getOne(*S, /* Negative */ true);
+ Buffer.clear();
+ HFP.toString(Buffer);
+ EXPECT_EQ("-1", std::string(Buffer.data(), Buffer.size()));
+ }
}
TEST(APFloatTest, DecimalStringsWithoutNullTerminators) {
@@ -1120,6 +1317,26 @@ TEST(APFloatTest, DecimalStringsWithoutNullTerminators) {
EXPECT_EQ(convertToDoubleFromString(StringRef("0.095", 4)), 0.09);
EXPECT_EQ(convertToDoubleFromString(StringRef("0.00e+3", 7)), 0.00);
EXPECT_EQ(convertToDoubleFromString(StringRef("0e+3", 4)), 0.00);
+
+ // HexFloat
+ for (const auto *S : HexFloatSemantics) {
+ APFloat One(APFloat::getOne(*S, false));
+ APFloat F(*S);
+ auto SemanticsName = APFloat::semanticsName(*S);
+
+ auto StatusOrErr =
+ F.convertFromString(StringRef("1.0", 3), APFloat::rmNearestTiesToEven);
+ EXPECT_FALSE(!StatusOrErr);
+ consumeError(StatusOrErr.takeError());
+ EXPECT_EQ(One, F) << " Semantics: " << SemanticsName;
+
+ One.changeSign();
+ StatusOrErr =
+ F.convertFromString(StringRef("-1.0", 4), APFloat::rmNearestTiesToEven);
+ EXPECT_FALSE(!StatusOrErr);
+ consumeError(StatusOrErr.takeError());
+ EXPECT_EQ(One, F) << " Semantics: " << SemanticsName;
+ }
}
TEST(APFloatTest, fromZeroDecimalString) {
@@ -1150,6 +1367,30 @@ TEST(APFloatTest, fromZeroDecimalString) {
EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0000.00000").convertToDouble());
EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0000.00000").convertToDouble());
EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0000.00000").convertToDouble());
+
+ // HexFloat
+ bool HasError;
+ for (const auto *S : HexFloatSemantics) {
+ const APFloat One(APFloat::getOne(*S));
+ APFloat HFP(*S);
+
+ for (const auto *s :
+ {"0", "+0", "-0", "0.", "+0.",
+ "-0.", ".0", "+.0", "-.0", "0.0",
+ "+0.0", "-0.0", "00000.", "-00000.", "+00000.",
+ ".00000", "-.00000", "+.00000", "0000.00000", "-0000.00000",
+ "+0000.00000"}) {
+ const bool ExpectNegative = ('-' == s[0]);
+ // set to something known not to be zero so we know
+ // we are converting the string
+ HFP = One;
+ EXPECT_TRUE(HFP.isNonZero());
+ HasError = !HFP.convertFromString(s, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP.isZero()) << " value is " << s;
+ EXPECT_EQ(ExpectNegative, HFP.isNegative()) << " value is " << s;
+ }
+ }
}
TEST(APFloatTest, fromZeroDecimalSingleExponentString) {
@@ -1207,6 +1448,33 @@ TEST(APFloatTest, fromZeroDecimalSingleExponentString) {
EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "000.0000e1").convertToDouble());
EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+000.0000e+1").convertToDouble());
EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-000.0000e+1").convertToDouble());
+
+ // HexFloat
+ bool HasError;
+ for (const auto *S : HexFloatSemantics) {
+ const APFloat One(APFloat::getOne(*S));
+ APFloat HFP(*S);
+
+ for (const auto *s :
+ {"0e1", "+0e1", "-0e1", "0e+1", "+0e+1",
+ "-0e+1", "0e-1", "+0e-1", "-0e-1", "0.e1",
+ "+0.e1", "-0.e1", "0.e+1", "+0.e+1", "-0.e+1",
+ "0.e-1", "+0.e-1", "-0.e-1", ".0e1", "+.0e1",
+ "-.0e1", ".0e+1", "+.0e+1", "-0.e+1", ".0e-1",
+ "+.0e-1", "-0.e-1", "0.0e1", "+0.0e1", "-0.0e1",
+ "0.0e+1", "+0.0e+1", "-0.0e+1", "0.0e-1", "+0.0e-1",
+ "-0.0e-1", "000.0000e1", "+000.0000e+1", "-000.0000e+1"}) {
+ const bool ExpectNegative = ('-' == s[0]);
+ // set to something known not to be zero so we know
+ // we are converting the string
+ HFP = One;
+ EXPECT_TRUE(HFP.isNonZero());
+ HasError = !HFP.convertFromString(s, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP.isZero()) << " value is " << s;
+ EXPECT_EQ(ExpectNegative, HFP.isNegative()) << " value is " << s;
+ }
+ }
}
TEST(APFloatTest, fromZeroDecimalLargeExponentString) {
@@ -1226,6 +1494,30 @@ TEST(APFloatTest, fromZeroDecimalLargeExponentString) {
EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), "000.0000e-1234").convertToDouble());
EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), StringRef("0e1234" "\0" "2", 6)).convertToDouble());
+
+ // HexFloat
+ bool HasError;
+ for (const auto *S : HexFloatSemantics) {
+ const APFloat One(APFloat::getOne(*S));
+ APFloat HFP(*S);
+
+ using SR = StringRef;
+ for (const StringRef &s :
+ {SR("0e1234"), SR("+0e1234"), SR("-0e1234"), SR("0e+1234"),
+ SR("+0e+1234"), SR("-0e+1234"), SR("0e-1234"), SR("+0e-1234"),
+ SR("-0e-1234"), SR("000.0000e1234"), SR("000.0000e-1234"),
+ SR("0e1234" "\0" "2", 6) }) {
+ const bool ExpectNegative = ('-' == s[0]);
+ // set to something known not to be zero so we know we are
+ // converting the string
+ HFP = One;
+ EXPECT_TRUE(HFP.isNonZero());
+ HasError = !HFP.convertFromString(s, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP.isZero()) << " value is " << s;
+ EXPECT_EQ(ExpectNegative, HFP.isNegative()) << " value is " << s;
+ }
+ }
}
TEST(APFloatTest, fromZeroHexadecimalString) {
@@ -1291,6 +1583,47 @@ TEST(APFloatTest, fromZeroHexadecimalString) {
EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0000.00000p1234").convertToDouble());
EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.00000p1234").convertToDouble());
EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1234").convertToDouble());
+
+ // HexFloat
+ bool HasError;
+ for (const auto *S : HexFloatSemantics) {
+ const APFloat One(APFloat::getOne(*S));
+ APFloat HFP(*S);
+
+ for (const auto *s : {"0x0p1", "+0x0p1",
+ "-0x0p1", "0x0p+1",
+ "+0x0p+1", "-0x0p+1",
+ "0x0p-1", "+0x0p-1",
+ "-0x0p-1", "0x0.p1",
+ "+0x0.p1", "-0x0.p1",
+ "0x0.p+1", "+0x0.p+1",
+ "-0x0.p+1", "0x0.p-1",
+ "+0x0.p-1", "-0x0.p-1",
+ "0x.0p1", "+0x.0p1",
+ "-0x.0p1", "0x.0p+1",
+ "+0x.0p+1", "-0x.0p+1",
+ "0x.0p-1", "+0x.0p-1",
+ "-0x.0p-1", "0x0.0p1",
+ "+0x0.0p1", "-0x0.0p1",
+ "0x0.0p+1", "+0x0.0p+1",
+ "-0x0.0p+1", "0x0.0p-1",
+ "+0x0.0p-1", "-0x0.0p-1",
+ "0x00000.p1", "0x0000.00000p1",
+ "0x.00000p1", "0x0.p1",
+ "0x0p1234", "-0x0p1234",
+ "0x00000.p1234", "0x0000.00000p1234",
+ "0x.00000p1234", "0x0.p1234"}) {
+ const bool ExpectNegative = ('-' == s[0]);
+ // set to something known not to be zero so we know we
+ // are converting the string
+ HFP = One;
+ EXPECT_TRUE(HFP.isNonZero());
+ HasError = !HFP.convertFromString(s, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP.isZero()) << " value is " << s;
+ EXPECT_EQ(ExpectNegative, HFP.isNegative()) << " value is " << s;
+ }
+ }
}
TEST(APFloatTest, fromDecimalString) {
@@ -1354,6 +1687,63 @@ TEST(APFloatTest, fromDecimalString) {
EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-1e-99999").isNegZero());
EXPECT_EQ(2.71828, convertToDoubleFromString("2.71828"));
+
+ // HexFloat
+ struct test_t {
+ const char *expected;
+ const char *value;
+ };
+ const test_t tests[] = {
+ // clang-format off
+ { "1", "1" }, { "2", "2." }, { "0.5", ".5" },
+ { "1", "1.0" }, { "-2", "-2" }, { "-4", "-4." },
+ { "-0.5", "-.5" }, { "-1.5", "-1.5" }, { "1.25E+12", "1.25e12" },
+ { "1.25E+12", "1.25E+12" }, { "1.25E-12", "1.25E-12" },
+ { "1024", "1024." }, { "1024.05", "1024.05000" },
+ { "0.05", ".05000" }, { "2", "2." },
+ { "200", "2.e2" }, { "200", "2.e+2" }, { "0.02", "2.e-2" },
+ { "205", "002.05000e2" }, { "205", "002.05000e+2" },
+ { "0.0205", "002.05000e-2" },
+ { "2.05E+12", "002.05000e12" },
+ { "2.05E+12", "002.05000e+12" },
+ { "2.05E-12", "002.05000e-12" },
+ { "1", "1e" }, { "1", "+1e" }, { "-1", "-1e" },
+ { "1", "1.e" }, { "1", "+1.e" }, { "-1", "-1.e" },
+ { "0.1", ".1e" }, { "0.1", "+.1e" }, { "-0.1", "-.1e" },
+ { "1.1", "1.1e" }, { "1.1", "+1.1e" }, { "-1.1", "-1.1e" },
+ { "1", "1e+" }, { "1", "1e-" }, { "0.1", ".1e" },
+ { "0.1", ".1e+" }, { "0.1", ".1e-" }, { "1", "1.0e" },
+ { "1", "1.0e+" }, { "1", "1.0e-" }
+ // clang-format on
+ };
+
+ bool HasError;
+ llvm::SmallVector<char, 100> Buffer;
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ const APFloat Zero(APFloat::getZero(*S));
+ APFloat HFP(*S);
+
+ for (const auto &test : tests) {
+ const bool ExpectNegative = ('-' == test.value[0]);
+ // set to zero so we know we are converting the string
+ HFP = Zero;
+ EXPECT_TRUE(HFP.isZero());
+ HasError = !HFP.convertFromString(test.value, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP.isNonZero())
+ << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;
+ EXPECT_EQ(ExpectNegative, HFP.isNegative())
+ << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;
+ Buffer.clear();
+ HFP.toString(Buffer, 6);
+ EXPECT_EQ(test.expected, std::string(Buffer.data(), Buffer.size()))
+ << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;
+ }
+ }
}
TEST(APFloatTest, fromStringSpecials) {
@@ -1571,6 +1961,75 @@ TEST(APFloatTest, fromHexadecimalString) {
convertToDoubleFromString("+0x800000000000000001.p-221"));
EXPECT_EQ(2251799813685248.5,
convertToDoubleFromString("0x80000000000004000000.010p-28"));
+
+ // HexFloat
+ struct test_t {
+ const char *expected;
+ const char *value;
+ };
+ const test_t tests[] = {
+ // clang-format off
+ { "1", "0x1p0" }, { "1", "+0x1p0" }, { "-1", "-0x1p0" },
+ { "1", "0x1p+0" }, { "1", "+0x1p+0" }, { "-1", "-0x1p+0" },
+ { "1", "0x1p-0" }, { "1", "+0x1p-0" }, { "-1", "-0x1p-0" },
+ { "2", "0x1p1" }, { "2", "+0x1p1" }, { "-2", "-0x1p1" },
+ { "2", "0x1p+1" }, { "2", "+0x1p+1" }, { "-2", "-0x1p+1" },
+ { "0.5", "0x1p-1" }, { "0.5", "+0x1p-1" }, { "-0.5", "-0x1p-1" },
+ { "3", "0x1.8p1" }, { "3", "+0x1.8p1" }, { "-3", "-0x1.8p1" },
+ { "3", "0x1.8p+1" }, { "3", "+0x1.8p+1" }, { "-3", "-0x1.8p+1" },
+ { "0.75", "0x1.8p-1" }, { "0.75", "+0x1.8p-1" },
+ { "-0.75", "-0x1.8p-1" },
+ { "8192", "0x1000.000p1" }, { "8192", "+0x1000.000p1" },
+ { "-8192", "-0x1000.000p1" },
+ { "8192", "0x1000.000p+1" }, { "8192", "+0x1000.000p+1" },
+ { "-8192", "-0x1000.000p+1" },
+ { "2048", "0x1000.000p-1" }, { "2048", "+0x1000.000p-1" },
+ { "-2048", "-0x1000.000p-1" },
+ { "8192", "0x1000p1" }, { "8192", "+0x1000p1" },
+ { "-8192", "-0x1000p1" },
+ { "8192", "0x1000p+1" }, { "8192", "+0x1000p+1" },
+ { "-8192", "-0x1000p+1" },
+ { "2048", "0x1000p-1" }, { "2048", "+0x1000p-1" },
+ { "-2048", "-0x1000p-1" },
+ { "16384", "0x10p10" }, { "16384", "+0x10p10" },
+ { "-16384", "-0x10p10" },
+ { "16384", "0x10p+10" }, { "16384", "+0x10p+10" },
+ { "-16384", "-0x10p+10" },
+ { "0.015625", "0x10p-10" }, { "0.015625", "+0x10p-10" },
+ { "-0.015625", "-0x10p-10" },
+ { "1.0625", "0x1.1p0" },
+ { "7.00649E-46", "+0x800000000000000001.p-221" },
+ { "2.2518E+15", "0x80000000000004000000.010p-28" }
+ //clang-foramt on
+ };
+
+ bool HasError;
+ llvm::SmallVector<char, 100> Buffer;
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ const APFloat Zero(APFloat::getZero(*S));
+ APFloat HFP(*S);
+
+ for (const auto &test : tests) {
+ const bool ExpectNegative = ('-' == test.value[0]);
+ // set to zero so we know we are converting the string
+ HFP = Zero;
+ EXPECT_TRUE(HFP.isZero());
+ HasError = !HFP.convertFromString(test.value, APFloat::rmTowardZero);
+ EXPECT_FALSE(HasError);
+ EXPECT_TRUE(HFP.isNonZero())
+ << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;
+ EXPECT_EQ(ExpectNegative, HFP.isNegative())
+ << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;
+ Buffer.clear();
+ HFP.toString(Buffer, 6);
+ EXPECT_EQ(test.expected, std::string(Buffer.data(), Buffer.size()))
+ << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;;
+ }
+ }
}
TEST(APFloatTest, toString) {
@@ -1610,54 +2069,72 @@ TEST(APFloatTest, toString) {
UnnormalZero.toString(Str);
ASSERT_EQ("NaN", Str);
}
+
+ // HexFloat: toString is exercised in the methods above
}
TEST(APFloatTest, toInteger) {
bool isExact = false;
APSInt result(5, /*isUnsigned=*/true);
- EXPECT_EQ(APFloat::opOK,
- APFloat(APFloat::IEEEdouble(), "10")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_TRUE(isExact);
- EXPECT_EQ(APSInt(APInt(5, 10), true), result);
-
- EXPECT_EQ(APFloat::opInvalidOp,
- APFloat(APFloat::IEEEdouble(), "-10")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_FALSE(isExact);
- EXPECT_EQ(APSInt::getMinValue(5, true), result);
-
- EXPECT_EQ(APFloat::opInvalidOp,
- APFloat(APFloat::IEEEdouble(), "32")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_FALSE(isExact);
- EXPECT_EQ(APSInt::getMaxValue(5, true), result);
-
- EXPECT_EQ(APFloat::opInexact,
- APFloat(APFloat::IEEEdouble(), "7.9")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_FALSE(isExact);
- EXPECT_EQ(APSInt(APInt(5, 7), true), result);
-
- result.setIsUnsigned(false);
- EXPECT_EQ(APFloat::opOK,
- APFloat(APFloat::IEEEdouble(), "-10")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_TRUE(isExact);
- EXPECT_EQ(APSInt(APInt(5, -10, true), false), result);
-
- EXPECT_EQ(APFloat::opInvalidOp,
- APFloat(APFloat::IEEEdouble(), "-17")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_FALSE(isExact);
- EXPECT_EQ(APSInt::getMinValue(5, false), result);
-
- EXPECT_EQ(APFloat::opInvalidOp,
- APFloat(APFloat::IEEEdouble(), "16")
- .convertToInteger(result, APFloat::rmTowardZero, &isExact));
- EXPECT_FALSE(isExact);
- EXPECT_EQ(APSInt::getMaxValue(5, false), result);
+ for (const auto *S : {&APFloat::IEEEdouble(), &APFloat::HexFP32(),
+ &APFloat::HexFP64(), &APFloat::HexFP128()}) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ result.setIsUnsigned(true);
+ EXPECT_EQ(APFloat::opOK, APFloat(*S, "10").convertToInteger(
+ result, APFloat::rmTowardZero, &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt(APInt(5, 10), true), result) << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(APFloat::opInvalidOp,
+ APFloat(*S, "-10").convertToInteger(result, APFloat::rmTowardZero,
+ &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_FALSE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt::getMinValue(5, true), result)
+ << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(APFloat::opInvalidOp,
+ APFloat(*S, "32").convertToInteger(result, APFloat::rmTowardZero,
+ &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_FALSE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt::getMaxValue(5, true), result)
+ << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(APFloat::opInexact,
+ APFloat(*S, "7.9").convertToInteger(result, APFloat::rmTowardZero,
+ &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_FALSE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt(APInt(5, 7), true), result)
+ << " Semantics: " << SemanticsName;
+
+ result.setIsUnsigned(false);
+ EXPECT_EQ(APFloat::opOK, APFloat(*S, "-10").convertToInteger(
+ result, APFloat::rmTowardZero, &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt(APInt(5, -10, true), false), result)
+ << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(APFloat::opInvalidOp,
+ APFloat(*S, "-17").convertToInteger(result, APFloat::rmTowardZero,
+ &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_FALSE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt::getMinValue(5, false), result)
+ << " Semantics: " << SemanticsName;
+
+ EXPECT_EQ(APFloat::opInvalidOp,
+ APFloat(*S, "16").convertToInteger(result, APFloat::rmTowardZero,
+ &isExact))
+ << " Semantics: " << SemanticsName;
+ EXPECT_FALSE(isExact) << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APSInt::getMaxValue(5, false), result)
+ << " Semantics: " << SemanticsName;
+ }
}
class APFloatConvertFromAPIntParamTest
@@ -1669,6 +2146,7 @@ class APFloatConvertFromAPIntParamTest
APFloat::roundingMode RM,
const APInt &ExpectedIntValue) {
const fltSemantics &Sem = *GetParam();
+ auto SemanticsName = APFloat::semanticsName(Sem);
APFloat F(Sem);
F.convertFromAPInt(InputValue, /*IsSigned=*/IsSigned, RM);
@@ -1676,10 +2154,12 @@ class APFloatConvertFromAPIntParamTest
bool IsExact;
F.convertToInteger(ResultInt, APFloat::rmTowardZero, &IsExact);
- EXPECT_TRUE(IsExact);
+ EXPECT_TRUE(IsExact) << "Semantics: " << SemanticsName;
EXPECT_TRUE(ResultInt.eq(ExpectedIntValue))
<< "InputValue: " << InputValue << "\n"
- << ResultInt << " vs " << ExpectedIntValue << "\n";
+ << ResultInt << " vs " << ExpectedIntValue << "\n"
+ << "Semantics: " << SemanticsName << "\n"
+ << "Rounding mode: " << RM << "\n";
}
};
@@ -1690,12 +2170,15 @@ TEST_P(APFloatConvertFromAPIntParamTest, HalfwayRounding) {
if (Precision == 0)
GTEST_SKIP() << "Skipping test for semantics with no significand.";
+ const unsigned Radix = APFloat::semanticsRadix(Sem);
+ const unsigned Log2Radix = llvm::countr_zero(Radix);
+
for (bool IsSigned : {false, true}) {
- const unsigned BitWidth = Precision + 1 + (IsSigned ? 1 : 0);
+ const unsigned BitWidth = (Precision + 1) * Log2Radix + (IsSigned ? 1 : 0);
- const APInt RoundedDownVal = APInt::getOneBitSet(BitWidth, Precision);
- const APInt HalfwayVal = RoundedDownVal + 1;
- const APInt RoundedUpVal = RoundedDownVal + 2;
+ const APInt RoundedDownVal = APInt::getOneBitSet(BitWidth, Precision * Log2Radix);
+ const APInt HalfwayVal = RoundedDownVal + (Radix / 2);
+ const APInt RoundedUpVal = RoundedDownVal + Radix;
testConversionAndCompareInt(HalfwayVal, IsSigned,
APFloat::rmNearestTiesToEven, RoundedDownVal);
@@ -1713,20 +2196,27 @@ TEST_P(APFloatConvertFromAPIntParamTest, HalfwayRounding) {
TEST_P(APFloatConvertFromAPIntParamTest, MaxMagnitude) {
const fltSemantics &Sem = *GetParam();
const unsigned Precision = APFloat::semanticsPrecision(Sem);
+ auto SemanticsName = APFloat::semanticsName(Sem);
if (Precision == 0)
GTEST_SKIP() << "Skipping test for semantics with no significand.";
+ const unsigned Radix = APFloat::semanticsRadix(Sem);
+ const unsigned Log2Radix = llvm::countr_zero(Radix);
+
const APFloat Largest = APFloat::getLargest(Sem, /*Negative=*/false);
const int Exp = ilogb(Largest);
for (bool IsSigned : {false, true}) {
- const unsigned BitWidth = Exp + 1 + (IsSigned ? 1 : 0);
+ const unsigned BitWidth = (Exp + 1) * Log2Radix + (IsSigned ? 1 : 0);
bool IsExact;
APSInt LargestAsInt{BitWidth, /*IsUnsigned=*/!IsSigned};
const APFloat::opStatus ToIntStatus =
Largest.convertToInteger(LargestAsInt, APFloat::rmTowardZero, &IsExact);
- EXPECT_EQ(ToIntStatus, APFloat::opOK);
+ EXPECT_EQ(ToIntStatus, APFloat::opOK)
+ << "Semantics:; " << SemanticsName << "\n"
+ << " isSigned == " << IsSigned
+ << " Exp == " << Exp;
for (const APFloat::roundingMode RM :
{APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
@@ -1742,7 +2232,10 @@ INSTANTIATE_TEST_SUITE_P(IEEESemantics, APFloatConvertFromAPIntParamTest,
&APFloat::BFloat(),
&APFloat::IEEEsingle(),
&APFloat::IEEEdouble(),
- &APFloat::IEEEquad()));
+ &APFloat::IEEEquad(),
+ &APFloat::HexFP32(),
+ &APFloat::HexFP64(),
+ &APFloat::HexFP128()));
static APInt nanbitsFromAPInt(const fltSemantics &Sem, bool SNaN, bool Negative,
uint64_t payload) {
@@ -2012,6 +2505,30 @@ TEST(APFloatTest, exactInverse) {
EXPECT_TRUE(
inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0x1p-1022")));
+ for (const auto *S : HexFloatSemantics) {
+ EXPECT_TRUE(APFloat(*S, "2.0").getExactInverse(&inv));
+ EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(*S, "0.5")));
+ EXPECT_TRUE(inv.getExactInverse(&inv));
+ EXPECT_TRUE(APFloat(*S, "2.0").bitwiseIsEqual(inv));
+
+ EXPECT_TRUE(APFloat(*S, "-2.0").getExactInverse(&inv));
+ EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(*S, "-0.5")));
+ EXPECT_TRUE(inv.getExactInverse(&inv));
+ EXPECT_TRUE(APFloat(*S, "-2.0").bitwiseIsEqual(inv));
+
+ EXPECT_TRUE(APFloat(*S, "32.0").getExactInverse(&inv));
+ EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(*S, "0.03125")));
+ EXPECT_TRUE(inv.getExactInverse(&inv));
+ EXPECT_TRUE(APFloat(*S, "32.0").bitwiseIsEqual(inv));
+
+ EXPECT_TRUE(APFloat(*S, "-32.0").getExactInverse(&inv));
+ EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(*S, "-0.03125")));
+ EXPECT_TRUE(inv.getExactInverse(&inv));
+ EXPECT_TRUE(APFloat(*S, "-32.0").bitwiseIsEqual(inv));
+
+ EXPECT_FALSE(APFloat::getZero(*S).getExactInverse(nullptr));
+ }
+
// FLT_MIN
EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv));
EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(8.5070592e+37f)));
@@ -2201,6 +2718,179 @@ TEST(APFloatTest, roundToIntegral) {
St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
EXPECT_EQ(10.0, P.convertToDouble());
EXPECT_EQ(APFloat::opInexact, St);
+
+ // HexFloat
+ {
+ constexpr bool Positive = false;
+ constexpr bool Negative = !Positive;
+ APFloat::opStatus St;
+ for (const auto *Sem : HexFloatSemantics) {
+ APFloat T(*Sem, "-0.5"), S(*Sem, "3.14");
+ APFloat R(APFloat::getLargest(*Sem)), P(*Sem, "0.0");
+
+ P = T;
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_TRUE(P.isNegZero());
+
+ P = T;
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_EQ(APFloat::getOne(*Sem, Negative), P);
+ P = T;
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_TRUE(P.isNegZero());
+ P = T;
+ St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_TRUE(P.isNegZero());
+
+ P = S;
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_EQ(APFloat(*Sem, "3"), P);
+ P = S;
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_EQ(APFloat(*Sem, "3"), P);
+ P = S;
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_EQ(APFloat(*Sem, "4"), P);
+ P = S;
+ St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat::opInexact, St);
+ EXPECT_EQ(APFloat(*Sem, "3"), P);
+
+ P = R;
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat::opOK, St);
+ EXPECT_EQ(R, P);
+ P = R;
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_EQ(APFloat::opOK, St);
+ EXPECT_EQ(R, P);
+ P = R;
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(APFloat::opOK, St);
+ EXPECT_EQ(R, P);
+ P = R;
+ St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat::opOK, St);
+ EXPECT_EQ(R, P);
+
+ P = APFloat::getZero(*Sem, Positive);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat::opOK, St);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_TRUE(P.isPosZero());
+
+ P = APFloat::getZero(*Sem, Negative);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat::opOK, St);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_TRUE(P.isNegZero());
+
+ P = APFloat::getZero(*Sem, Positive);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getSmallest(*Sem, Positive);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat::getSmallest(*Sem, Positive);
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(APFloat::getOne(*Sem, Positive), P);
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat::getSmallest(*Sem, Negative);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::getOne(*Sem, Negative), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat::getSmallest(*Sem, Negative);
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "10.0");
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat(*Sem, "10.0"), P);
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat(*Sem, "10.5");
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat(*Sem, "10.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "10.5");
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(APFloat(*Sem, "11.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "10.5");
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_EQ(APFloat(*Sem, "10.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "10.5");
+ St = P.roundToIntegral(APFloat::rmNearestTiesToAway);
+ EXPECT_EQ(APFloat(*Sem, "11.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "10.5");
+ St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat(*Sem, "10.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "-10.0");
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat(*Sem, "-10.0"), P);
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat(*Sem, "-10.5");
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(APFloat(*Sem, "-10.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "-10.5");
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(APFloat(*Sem, "-10.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "-10.5");
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_EQ(APFloat(*Sem, "-11.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "-10.5");
+ St = P.roundToIntegral(APFloat::rmNearestTiesToAway);
+ EXPECT_EQ(APFloat(*Sem, "-11.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(*Sem, "-10.5");
+ St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(APFloat(*Sem, "-10.0"), P);
+ EXPECT_EQ(APFloat::opInexact, St);
+ }
+ }
}
TEST(APFloatTest, isInteger) {
@@ -2216,6 +2906,17 @@ TEST(APFloatTest, isInteger) {
EXPECT_FALSE(T.isInteger());
T = APFloat::getLargest(APFloat::IEEEdouble());
EXPECT_TRUE(T.isInteger());
+
+ {
+ for (const auto *Sem : HexFloatSemantics) {
+ APFloat T(*Sem, "-0.0");
+ EXPECT_TRUE(T.isInteger());
+ T = APFloat(*Sem, "3.14159");
+ EXPECT_FALSE(T.isInteger());
+ T = APFloat::getLargest(*Sem);
+ EXPECT_TRUE(T.isInteger());
+ }
+ }
}
TEST(DoubleAPFloatTest, isInteger) {
@@ -2305,6 +3006,16 @@ TEST(APFloatTest, getLargest) {
EXPECT_EQ(7.5,
APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble());
EXPECT_EQ(6, APFloat::getLargest(APFloat::Float4E2M1FN()).convertToDouble());
+ EXPECT_EQ(APFloat::getLargest(APFloat::HexFP32()),
+ APFloat(APFloat::HexFP32(), "0x.ffffffp252"));
+ EXPECT_EQ(APFloat::getLargest(APFloat::HexFP64()),
+ APFloat(APFloat::HexFP64(), "0x.ffffff"
+ "ffffffffp252"));
+ EXPECT_EQ(APFloat::getLargest(APFloat::HexFP128()),
+ APFloat(APFloat::HexFP128(), "0x.ffffff"
+ "ffffffff"
+ "ffffff"
+ "ffffffffp252"));
}
TEST(APFloatTest, getSmallest) {
@@ -2391,6 +3102,17 @@ TEST(APFloatTest, getSmallest) {
EXPECT_TRUE(test.isFiniteNonZero());
EXPECT_FALSE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
+
+ EXPECT_EQ(APFloat::getSmallest(APFloat::HexFP32()),
+ APFloat(APFloat::HexFP32(), "0x.000001p-256"));
+ EXPECT_EQ(APFloat::getSmallest(APFloat::HexFP64()),
+ APFloat(APFloat::HexFP64(), "0x.000000"
+ "00000001p-256"));
+ EXPECT_EQ(APFloat::getSmallest(APFloat::HexFP128()),
+ APFloat(APFloat::HexFP128(), "0x.000000"
+ "00000000"
+ "000000"
+ "00000001p-256"));
}
TEST(APFloatTest, getSmallestNormalized) {
@@ -2505,6 +3227,13 @@ TEST(APFloatTest, getSmallestNormalized) {
EXPECT_FALSE(test.isDenormal());
EXPECT_TRUE(test.bitwiseIsEqual(expected));
EXPECT_TRUE(test.isSmallestNormalized());
+
+ EXPECT_EQ(APFloat::getSmallestNormalized(APFloat::HexFP32()),
+ APFloat(APFloat::HexFP32(), "0x.1p-256"));
+ EXPECT_EQ(APFloat::getSmallestNormalized(APFloat::HexFP64()),
+ APFloat(APFloat::HexFP64(), "0x.1p-256"));
+ EXPECT_EQ(APFloat::getSmallestNormalized(APFloat::HexFP128()),
+ APFloat(APFloat::HexFP128(), "0x.1p-256"));
}
TEST(APFloatTest, getZero) {
@@ -2548,7 +3277,19 @@ TEST(APFloatTest, getZero) {
{&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1},
{&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1},
{&APFloat::Float4E2M1FN(), false, true, {0, 0}, 1},
- {&APFloat::Float4E2M1FN(), true, true, {0x8ULL, 0}, 1}};
+ {&APFloat::Float4E2M1FN(), true, true, {0x8ULL, 0}, 1},
+ {&APFloat::HexFP32(), false, true, {0, 0}, 1},
+ {&APFloat::HexFP32(), true, true, {0x80000000, 0}, 1},
+ {&APFloat::HexFP64(), false, true, {0, 0}, 1},
+ {&APFloat::HexFP64(), true, true, {0x8000000000000000, 0}, 1},
+ {&APFloat::HexFP128(), false, true, {0, 0}, 2},
+ // negative HFP128: sign bit is MSB,
+ // however note that APInt seems to store in little endian order
+ {&APFloat::HexFP128(),
+ true,
+ true,
+ {0x0000000000000000, 0x8000000000000000},
+ 2}};
const unsigned NumGetZeroTests = std::size(GetZeroTest);
for (unsigned i = 0; i < NumGetZeroTests; ++i) {
APFloat test = APFloat::getZero(*GetZeroTest[i].semantics,
@@ -2564,7 +3305,8 @@ TEST(APFloatTest, getZero) {
EXPECT_TRUE(test.bitwiseIsEqual(expected));
for (unsigned j = 0, je = GetZeroTest[i].bitPatternLength; j < je; ++j) {
EXPECT_EQ(GetZeroTest[i].bitPattern[j],
- test.bitcastToAPInt().getRawData()[j]);
+ test.bitcastToAPInt().getRawData()[j])
+ << " i == " << i << " j == " << j;
}
}
}
@@ -2587,6 +3329,21 @@ TEST(APFloatTest, copySign) {
EXPECT_TRUE(APFloat::getNaN(Sem, true).bitwiseIsEqual(
APFloat::copySign(APFloat::getNaN(Sem, true), APFloat(1.0))));
}
+ for (const auto *S : HexFloatSemantics) {
+ const APFloat FortyTwo(*S, "42.0");
+ const APFloat MinusFortyTwo(*S, "-42.0");
+ const APFloat One(APFloat::getOne(*S, /*Negatove == */ false));
+ const APFloat MinusOne(APFloat::getOne(*S, /*Negatove == */ true));
+
+ EXPECT_TRUE(MinusFortyTwo.bitwiseIsEqual(
+ APFloat::copySign(APFloat(FortyTwo), MinusOne)));
+ EXPECT_TRUE(FortyTwo.bitwiseIsEqual(
+ APFloat::copySign(APFloat(MinusFortyTwo), One)));
+ EXPECT_TRUE(MinusFortyTwo.bitwiseIsEqual(
+ APFloat::copySign(APFloat(MinusFortyTwo), MinusOne)));
+ EXPECT_TRUE(
+ FortyTwo.bitwiseIsEqual(APFloat::copySign(APFloat(FortyTwo), One)));
+ }
}
TEST(APFloatTest, convert) {
@@ -2697,6 +3454,37 @@ TEST(APFloatTest, convert) {
test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
EXPECT_EQ(0x01, test.bitcastToAPInt());
EXPECT_TRUE(losesInfo);
+
+ // HexFloat
+ test = APFloat(APFloat::HexFP64(), "1.0");
+ test.convert(APFloat::HexFP32(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(APFloat::getOne(APFloat::HexFP32()), test);
+ EXPECT_FALSE(losesInfo);
+
+ test = APFloat(APFloat::HexFP128(), "1.0");
+ test.convert(APFloat::HexFP32(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(APFloat::getOne(APFloat::HexFP32()), test);
+ EXPECT_FALSE(losesInfo);
+
+ test = APFloat(APFloat::HexFP128(), "1.0");
+ test.convert(APFloat::HexFP64(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(APFloat::getOne(APFloat::HexFP64()), test);
+ EXPECT_FALSE(losesInfo);
+
+ test = APFloat::getSmallest(APFloat::HexFP64());
+ test.convert(APFloat::HexFP32(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(APFloat::getZero(APFloat::HexFP32()), test);
+
+ test = APFloat::getSmallest(APFloat::HexFP128());
+ test.convert(APFloat::HexFP32(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(APFloat::getZero(APFloat::HexFP32()), test);
+
+ test = APFloat::getSmallest(APFloat::HexFP128());
+ test.convert(APFloat::HexFP64(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_TRUE(losesInfo);
+ EXPECT_EQ(APFloat::getZero(APFloat::HexFP64()), test);
}
TEST(APFloatTest, Float8UZConvert) {
@@ -2850,6 +3638,13 @@ TEST(APFloatTest, isNegative) {
EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNegative());
EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isNegative());
+
+ for (const auto *S : HexFloatSemantics) {
+ APFloat t(*S, "0x1p+0");
+ EXPECT_FALSE(t.isNegative());
+ t = APFloat(*S, "-0x1p+0");
+ EXPECT_TRUE(t.isNegative());
+ }
}
TEST(APFloatTest, isNormal) {
@@ -2858,9 +3653,21 @@ TEST(APFloatTest, isNormal) {
EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNormal());
EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNormal());
+ for (const auto *S : HexFloatSemantics) {
+ EXPECT_FALSE(APFloat::getZero(*S, false).isNormal());
+ }
EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNormal());
EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNormal());
EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isNormal());
+
+ for (const auto *S : HexFloatSemantics) {
+ APFloat F(APFloat::getSmallestNormalized(*S));
+ EXPECT_TRUE(F.isNormal());
+ EXPECT_FALSE(F.isDenormal());
+ F.next(/* down == */ true);
+ EXPECT_FALSE(F.isNormal());
+ EXPECT_TRUE(F.isDenormal());
+ }
}
TEST(APFloatTest, isFinite) {
@@ -2926,10 +3733,28 @@ TEST(APFloatTest, isFiniteNonZero) {
// Test positive/negative normal value.
EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p+0").isFiniteNonZero());
EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p+0").isFiniteNonZero());
+ EXPECT_TRUE(APFloat(APFloat::HexFP32(), "0x0.100000p-256").isFiniteNonZero());
+ EXPECT_TRUE(APFloat(APFloat::HexFP64(), "0x0.100000"
+ "00000000p-256")
+ .isFiniteNonZero());
+ EXPECT_TRUE(APFloat(APFloat::HexFP128(), "0x0.100000"
+ "00000000"
+ "000000"
+ "00000000p-256")
+ .isFiniteNonZero());
// Test positive/negative denormal value.
EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isFiniteNonZero());
EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p-149").isFiniteNonZero());
+ EXPECT_TRUE(APFloat(APFloat::HexFP32(), "0x0.000001p-256").isFiniteNonZero());
+ EXPECT_TRUE(APFloat(APFloat::HexFP64(), "0x0.000000"
+ "00000001p-256")
+ .isFiniteNonZero());
+ EXPECT_TRUE(APFloat(APFloat::HexFP128(), "0x0.000000"
+ "00000000"
+ "000000"
+ "00000001p-256")
+ .isFiniteNonZero());
// Test +/- Infinity.
EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isFiniteNonZero());
@@ -4091,6 +4916,56 @@ TEST(APFloatTest, Comparisons) {
Relations[I][J](Vals[I], Vals[J]);
}
+TEST(APFloatTest, ComparisonsHexFloat) {
+ enum { MBig, MOne, MZer, PZer, POne, PBig, NumVals };
+ using Relation = void (*)(const APFloat &, const APFloat &);
+ Relation LT = [](const APFloat &LHS, const APFloat &RHS) {
+ EXPECT_FALSE(LHS == RHS);
+ EXPECT_TRUE(LHS != RHS);
+ EXPECT_TRUE(LHS < RHS);
+ EXPECT_FALSE(LHS > RHS);
+ EXPECT_TRUE(LHS <= RHS);
+ EXPECT_FALSE(LHS >= RHS);
+ };
+ Relation EQ = [](const APFloat &LHS, const APFloat &RHS) {
+ EXPECT_TRUE(LHS == RHS);
+ EXPECT_FALSE(LHS != RHS);
+ EXPECT_FALSE(LHS < RHS);
+ EXPECT_FALSE(LHS > RHS);
+ EXPECT_TRUE(LHS <= RHS);
+ EXPECT_TRUE(LHS >= RHS);
+ };
+ Relation GT = [](const APFloat &LHS, const APFloat &RHS) {
+ EXPECT_FALSE(LHS == RHS);
+ EXPECT_TRUE(LHS != RHS);
+ EXPECT_FALSE(LHS < RHS);
+ EXPECT_TRUE(LHS > RHS);
+ EXPECT_FALSE(LHS <= RHS);
+ EXPECT_TRUE(LHS >= RHS);
+ };
+ Relation Relations[NumVals][NumVals] = {
+ // -B -1 -0 +0 +1 +B
+ /* MBig */ {EQ, LT, LT, LT, LT, LT},
+ /* MOne */ {GT, EQ, LT, LT, LT, LT},
+ /* MZer */ {GT, GT, EQ, EQ, LT, LT},
+ /* PZer */ {GT, GT, EQ, EQ, LT, LT},
+ /* POne */ {GT, GT, GT, GT, EQ, LT},
+ /* PBig */ {GT, GT, GT, GT, GT, EQ},
+ };
+ for (const auto *S : HexFloatSemantics) {
+ APFloat Vals[NumVals] = {APFloat::getLargest(*S, /* Negative == */ true),
+ APFloat::getOne(*S, /* Negative == */ true),
+ APFloat::getZero(*S, /* Negative == */ true),
+ APFloat::getZero(*S, /* Negative == */ false),
+ APFloat::getOne(*S, /* Negative == */ false),
+ APFloat::getLargest(*S, /* Negative == */ false)};
+
+ for (unsigned I = 0; I < NumVals; ++I)
+ for (unsigned J = 0; J < NumVals; ++J)
+ Relations[I][J](Vals[I], Vals[J]);
+ }
+}
+
TEST(APFloatTest, abs) {
APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
@@ -4129,6 +5004,32 @@ TEST(APFloatTest, abs) {
EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(MSmallestNormalized)));
}
+TEST(APFloatTest, absHexFloat) {
+ for (const auto *S : HexFloatSemantics) {
+ APFloat PZero = APFloat::getZero(*S, false);
+ APFloat MZero = APFloat::getZero(*S, true);
+ APFloat PNormalValue = APFloat(*S, "0x1p+0");
+ APFloat MNormalValue = APFloat(*S, "-0x1p+0");
+ APFloat PLargestValue = APFloat::getLargest(*S, false);
+ APFloat MLargestValue = APFloat::getLargest(*S, true);
+ APFloat PSmallestValue = APFloat::getSmallest(*S, false);
+ APFloat MSmallestValue = APFloat::getSmallest(*S, true);
+ APFloat PSmallestNormalized = APFloat::getSmallestNormalized(*S, false);
+ APFloat MSmallestNormalized = APFloat::getSmallestNormalized(*S, true);
+
+ EXPECT_TRUE(PZero.bitwiseIsEqual(abs(PZero)));
+ EXPECT_TRUE(PZero.bitwiseIsEqual(abs(MZero)));
+ EXPECT_TRUE(PNormalValue.bitwiseIsEqual(abs(PNormalValue)));
+ EXPECT_TRUE(PNormalValue.bitwiseIsEqual(abs(MNormalValue)));
+ EXPECT_TRUE(PLargestValue.bitwiseIsEqual(abs(PLargestValue)));
+ EXPECT_TRUE(PLargestValue.bitwiseIsEqual(abs(MLargestValue)));
+ EXPECT_TRUE(PSmallestValue.bitwiseIsEqual(abs(PSmallestValue)));
+ EXPECT_TRUE(PSmallestValue.bitwiseIsEqual(abs(MSmallestValue)));
+ EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(PSmallestNormalized)));
+ EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(MSmallestNormalized)));
+ }
+}
+
TEST(APFloatTest, neg) {
APFloat One = APFloat(APFloat::IEEEsingle(), "1.0");
APFloat NegOne = APFloat(APFloat::IEEEsingle(), "-1.0");
@@ -4162,6 +5063,25 @@ TEST(APFloatTest, neg) {
EXPECT_TRUE(QNaN.bitwiseIsEqual(-NegQNaN));
}
+TEST(APFloatTest, negHexFloat) {
+ for (const auto *S : HexFloatSemantics) {
+ APFloat One = APFloat(*S, "1.0");
+ APFloat NegOne = APFloat(*S, "-1.0");
+ APFloat Zero = APFloat::getZero(*S, false);
+ APFloat NegZero = APFloat::getZero(*S, true);
+
+ EXPECT_TRUE(NegOne.bitwiseIsEqual(neg(One)));
+ EXPECT_TRUE(One.bitwiseIsEqual(neg(NegOne)));
+ EXPECT_TRUE(NegZero.bitwiseIsEqual(neg(Zero)));
+ EXPECT_TRUE(Zero.bitwiseIsEqual(neg(NegZero)));
+
+ EXPECT_TRUE(NegOne.bitwiseIsEqual(-One));
+ EXPECT_TRUE(One.bitwiseIsEqual(-NegOne));
+ EXPECT_TRUE(NegZero.bitwiseIsEqual(-Zero));
+ EXPECT_TRUE(Zero.bitwiseIsEqual(-NegZero));
+ }
+}
+
TEST(APFloatTest, ilogb) {
EXPECT_EQ(-1074, ilogb(APFloat::getSmallest(APFloat::IEEEdouble(), false)));
EXPECT_EQ(-1074, ilogb(APFloat::getSmallest(APFloat::IEEEdouble(), true)));
@@ -4205,6 +5125,45 @@ TEST(APFloatTest, ilogb) {
ilogb(APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true)));
}
+TEST(APFloatTest, ilogbHexFloat) {
+ struct test_t {
+ const char *value;
+ int expected;
+ };
+ for (bool PosNeg : {false, true}) {
+ EXPECT_EQ(-70, ilogb(APFloat::getSmallest(APFloat::HexFP32(), PosNeg)));
+ EXPECT_EQ(-78, ilogb(APFloat::getSmallest(APFloat::HexFP64(), PosNeg)));
+ EXPECT_EQ(-92, ilogb(APFloat::getSmallest(APFloat::HexFP128(), PosNeg)));
+ EXPECT_EQ(
+ -65, ilogb(APFloat::getSmallestNormalized(APFloat::HexFP32(), PosNeg)));
+ EXPECT_EQ(
+ -65, ilogb(APFloat::getSmallestNormalized(APFloat::HexFP64(), PosNeg)));
+ EXPECT_EQ(-65, ilogb(APFloat::getSmallestNormalized(APFloat::HexFP128(),
+ PosNeg)));
+
+ test_t tests[] = {
+ // clang-format off
+ { "0x1.fffffp-256", -64 },
+ { "0x1.00000p-256", -64 },
+ { "0x0", APFloat::IEK_Zero },
+ { "0x1", 0 },
+ // clang-format on
+ };
+ for (const auto *S : HexFloatSemantics) {
+ for (const auto &T : tests) {
+ APFloat F(*S, T.value);
+ if (F.isNegative() != PosNeg)
+ F.changeSign();
+ EXPECT_EQ(T.expected, ilogb(F))
+ << " F == " << T.value << " isNegative() == " << F.isNegative();
+ }
+ }
+ EXPECT_EQ(62, ilogb(APFloat::getLargest(APFloat::HexFP32(), PosNeg)));
+ EXPECT_EQ(62, ilogb(APFloat::getLargest(APFloat::HexFP64(), PosNeg)));
+ EXPECT_EQ(62, ilogb(APFloat::getLargest(APFloat::HexFP128(), PosNeg)));
+ }
+}
+
TEST(APFloatTest, scalbn) {
const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
@@ -4376,6 +5335,58 @@ TEST(APFloatTest, scalbn) {
.bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEdouble(), "0x1p-51"), -52, RM)));
}
+TEST(APFloatTest, scalbnHexFloat) {
+ const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
+
+ for (const auto *S : HexFloatSemantics) {
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "8"), -1, RM).bitwiseIsEqual(APFloat(*S, "0.5")));
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "8"), 0, RM).bitwiseIsEqual(APFloat(*S, "8")));
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "8"), 1, RM).bitwiseIsEqual(APFloat(*S, "128")));
+
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "0"), -10000, RM).bitwiseIsEqual(APFloat(*S, "0")));
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "0"), -1, RM).bitwiseIsEqual(APFloat(*S, "0")));
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "0"), 0, RM).bitwiseIsEqual(APFloat(*S, "0")));
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "0"), 1, RM).bitwiseIsEqual(APFloat(*S, "0")));
+ EXPECT_TRUE(
+ scalbn(APFloat(*S, "0"), 10000, RM).bitwiseIsEqual(APFloat(*S, "0")));
+
+ EXPECT_TRUE(scalbn(APFloat::getSmallest(*S), -1, RM)
+ .bitwiseIsEqual(APFloat(*S, "0")));
+ EXPECT_TRUE(scalbn(APFloat(*S, "0x.1p252"), 1, RM)
+ .bitwiseIsEqual(APFloat::getLargest(*S)));
+ EXPECT_TRUE(scalbn(APFloat::getLargest(*S), 1, RM)
+ .bitwiseIsEqual(APFloat::getLargest(*S)));
+ }
+
+ EXPECT_TRUE(APFloat(APFloat::HexFP32(), "0x.1p252")
+ .bitwiseIsEqual(scalbn(
+ APFloat::getSmallest(APFloat::HexFP32()), 132, RM)));
+ EXPECT_TRUE(APFloat::getLargest(APFloat::HexFP32())
+ .bitwiseIsEqual(scalbn(
+ APFloat::getSmallest(APFloat::HexFP32()), 133, RM)));
+
+ EXPECT_TRUE(APFloat(APFloat::HexFP64(), "0x.1p252")
+ .bitwiseIsEqual(scalbn(
+ APFloat::getSmallest(APFloat::HexFP64()), 140, RM)));
+ EXPECT_TRUE(APFloat::getLargest(APFloat::HexFP64())
+ .bitwiseIsEqual(scalbn(
+ APFloat::getSmallest(APFloat::HexFP64()), 141, RM)));
+
+ EXPECT_TRUE(APFloat(APFloat::HexFP128(), "0x.1p252")
+ .bitwiseIsEqual(scalbn(
+ APFloat::getSmallest(APFloat::HexFP128()), 154, RM)));
+ EXPECT_TRUE(APFloat::getLargest(APFloat::HexFP128())
+ .bitwiseIsEqual(scalbn(
+ APFloat::getSmallest(APFloat::HexFP128()), 155, RM)));
+}
+
TEST(APFloatTest, frexp) {
const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
@@ -4502,6 +5513,122 @@ TEST(APFloatTest, frexp) {
EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1").bitwiseIsEqual(Frac));
}
+TEST(APFloatTest, frexpHexFloat) {
+ const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
+
+ for (const auto *S : HexFloatSemantics) {
+ auto SemanticsName = APFloat::semanticsName(*S);
+ APFloat PZero(APFloat::getZero(*S, false));
+ APFloat MZero(APFloat::getZero(*S, true));
+ APFloat One(APFloat::getOne(*S, false));
+ APFloat MOne(APFloat::getOne(*S, true));
+ APFloat Two(*S, "2.0");
+ APFloat MTwo(*S, "-2.0");
+
+ APFloat SmallestNormalized(APFloat::getSmallestNormalized(*S, false));
+ APFloat NegSmallestNormalized(APFloat::getSmallestNormalized(*S, true));
+ APFloat LargestDenormal(SmallestNormalized);
+ LargestDenormal.next(true);
+ APFloat NegLargestDenormal(LargestDenormal);
+ NegLargestDenormal.changeSign();
+
+ APFloat Smallest = APFloat::getSmallest(*S, false);
+ APFloat NegSmallest = APFloat::getSmallest(*S, true);
+
+ APFloat Largest = APFloat::getLargest(*S, false);
+ APFloat NegLargest = APFloat::getLargest(*S, true);
+
+ int Exp;
+ std::string Expected;
+ APFloat Frac(*S);
+
+ Frac = frexp(PZero, Exp, RM);
+ EXPECT_EQ(0, Exp);
+ EXPECT_TRUE(Frac.isPosZero());
+
+ Frac = frexp(MZero, Exp, RM);
+ EXPECT_EQ(0, Exp);
+ EXPECT_TRUE(Frac.isNegZero());
+
+ // we use a subtraction and the isZero() perdicate to avoid
+ // having to deal with the exact encoding, which is tricky
+ // for the low exponent in the 128 case.
+ Frac = frexp(One, Exp, RM);
+ EXPECT_EQ(1, Exp)
+ << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, "0.5"), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(MOne, Exp, RM);
+ EXPECT_EQ(1, Exp)
+ << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, "0.5"), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(LargestDenormal, Exp, RM);
+ EXPECT_EQ(-260, Exp)
+ << " Semantics: " << SemanticsName;
+ Expected = "0x.";
+ Expected.append(APFloat::semanticsPrecision(*S) - 1, 'f');
+ Expected.push_back('0');
+ EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, Expected), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(NegLargestDenormal, Exp, RM);
+ EXPECT_EQ(-260, Exp)
+ << " Semantics: " << SemanticsName;
+ Expected = "0x.";
+ Expected.append(APFloat::semanticsPrecision(*S) - 1, 'f');
+ Expected.push_back('0');
+ EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, Expected), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(Smallest, Exp, RM);
+ EXPECT_EQ(-256 - 4 * ((int)APFloat::semanticsPrecision(*S) - 1) - 3, Exp)
+ << " Semantics: " << APFloat::semanticsName(*S);
+ EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, "0.5"), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(NegSmallest, Exp, RM);
+ EXPECT_EQ(-256 - 4 * ((int)APFloat::semanticsPrecision(*S) - 1) - 3, Exp)
+ << " Semantics: " << SemanticsName;
+ EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, "0.5"), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(Largest, Exp, RM);
+ EXPECT_EQ(252, Exp)
+ << " Semantics: " << SemanticsName;
+ Expected = "0x.";
+ Expected.append(APFloat::semanticsPrecision(*S), 'f');
+ EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, Expected), RM))
+ << " Semantics: " << APFloat::semanticsName(*S);
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+
+ Frac = frexp(NegLargest, Exp, RM);
+ EXPECT_EQ(252, Exp)
+ << " Semantics: " << SemanticsName;
+ Expected = "0x.";
+ Expected.append(APFloat::semanticsPrecision(*S), 'f');
+ EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, Expected), RM))
+ << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero())
+ << " Semantics: " << SemanticsName;
+ }
+}
+
TEST(APFloatTest, mod) {
{
APFloat f1(APFloat::IEEEdouble(), "1.5");
@@ -7365,6 +8492,244 @@ TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntEnormous) {
}
}
+TEST(APFloatTest, HexFloatRoundToIntegral) {
+ const auto cmpEqual = llvm::APFloatBase::cmpEqual;
+
+ auto toString = [](llvm::RoundingMode rm) {
+ const char *s;
+ switch (rm) {
+ case APFloat::rmTowardZero:
+ s = "APFloat::rmTowardZero";
+ break;
+ case APFloat::rmTowardPositive:
+ s = "APFloat::rmTowardPositive";
+ break;
+ case APFloat::rmTowardNegative:
+ s = "APFloat::rmTowardNegative";
+ break;
+ case APFloat::rmNearestTiesToEven:
+ s = "APFloat::rmNearestTiesToEven";
+ break;
+ case APFloat::rmNearestTiesToAway:
+ s = "APFloat::rmNearestTiesToAway";
+ break;
+ default:
+ s = "unknown";
+ }
+ return s;
+ };
+
+ // instances of test_t describe a single test: the value to be rounded,
+ // the expected value, and the rounding mode.
+ struct test_t {
+ const char *value;
+ const char *expected_after_rounding;
+ llvm::RoundingMode rm;
+ };
+
+ // do a singke test
+ auto test_it = [&](const test_t t, const fltSemantics &S) {
+ APFloat hf(S, t.value);
+ APFloat expected(S, t.expected_after_rounding);
+ hf.roundToIntegral(t.rm);
+ EXPECT_EQ(cmpEqual, expected.compare(hf))
+ << "Semantics: " << APFloat::semanticsName(S) << "\n"
+ << "value == " << t.value
+ << ", expected == " << t.expected_after_rounding
+ << ", mode == " << toString(t.rm);
+ };
+
+ // these are the tessts
+ // Note tha fraction 0.25, 0,5, and 0.75 are losslessly expressable
+ // in HexFloat
+ const test_t tests[] = {
+ // clang-format off
+ { "0", "0", APFloat::rmTowardZero },
+ { "0", "0", APFloat::rmTowardPositive },
+ { "0", "0", APFloat::rmTowardNegative },
+ { "0", "0", APFloat::rmNearestTiesToEven },
+ { "0", "0", APFloat::rmNearestTiesToAway },
+
+ // The values with abosule value less than one have exponent < 1
+ { "0.25", "0", APFloat::rmTowardZero },
+ { "0.25", "1", APFloat::rmTowardPositive },
+ { "0.25", "0", APFloat::rmTowardNegative },
+ { "0.25", "0", APFloat::rmNearestTiesToEven },
+ { "0.25", "0", APFloat::rmNearestTiesToAway },
+
+ { "-0.25", "0", APFloat::rmTowardZero },
+ { "-0.25", "0", APFloat::rmTowardPositive },
+ { "-0.25", "-1", APFloat::rmTowardNegative },
+ { "-0.25", "0", APFloat::rmNearestTiesToEven },
+ { "-0.25", "0", APFloat::rmNearestTiesToAway },
+
+ { "0.5", "0", APFloat::rmTowardZero },
+ { "0.5", "1", APFloat::rmTowardPositive },
+ { "0.5", "0", APFloat::rmTowardNegative },
+ { "0.5", "0", APFloat::rmNearestTiesToEven },
+ { "0.5", "1", APFloat::rmNearestTiesToAway },
+
+ { "-0.5", "0", APFloat::rmTowardZero },
+ { "-0.5", "0", APFloat::rmTowardPositive },
+ { "-0.5", "-1", APFloat::rmTowardNegative },
+ { "-0.5", "0", APFloat::rmNearestTiesToEven },
+ { "-0.5", "-1", APFloat::rmNearestTiesToAway },
+
+ { "0.75", "0", APFloat::rmTowardZero },
+ { "0.75", "1", APFloat::rmTowardPositive },
+ { "0.75", "0", APFloat::rmTowardNegative },
+ { "0.75", "1", APFloat::rmNearestTiesToEven },
+ { "0.75", "1", APFloat::rmNearestTiesToAway },
+
+ { "-0.75", "0", APFloat::rmTowardZero },
+ { "-0.75", "0", APFloat::rmTowardPositive },
+ { "-0.75", "-1", APFloat::rmTowardNegative },
+ { "-0.75", "-1", APFloat::rmNearestTiesToEven },
+ { "-0.75", "-1", APFloat::rmNearestTiesToAway },
+
+ // exponent >= 1
+ // check how a value with an odd integer component rounds
+ { "1", "1", APFloat::rmTowardZero },
+ { "1", "1", APFloat::rmTowardPositive },
+ { "1", "1", APFloat::rmTowardNegative },
+ { "1", "1", APFloat::rmNearestTiesToEven },
+ { "1", "1", APFloat::rmNearestTiesToAway },
+
+ { "-1", "-1", APFloat::rmTowardZero },
+ { "-1", "-1", APFloat::rmTowardPositive },
+ { "-1", "-1", APFloat::rmTowardNegative },
+ { "-1", "-1", APFloat::rmNearestTiesToEven },
+ { "-1", "-1", APFloat::rmNearestTiesToAway },
+
+ { "1.25", "1", APFloat::rmTowardZero },
+ { "1.25", "2", APFloat::rmTowardPositive },
+ { "1.25", "1", APFloat::rmTowardNegative },
+ { "1.25", "1", APFloat::rmNearestTiesToEven },
+ { "1.25", "1", APFloat::rmNearestTiesToAway },
+
+ { "-1.25", "-1", APFloat::rmTowardZero },
+ { "-1.25", "-1", APFloat::rmTowardPositive },
+ { "-1.25", "-2", APFloat::rmTowardNegative },
+ { "-1.25", "-1", APFloat::rmNearestTiesToEven },
+ { "-1.25", "-1", APFloat::rmNearestTiesToAway },
+
+ { "1.5", "1", APFloat::rmTowardZero },
+ { "1.5", "2", APFloat::rmTowardPositive },
+ { "1.5", "1", APFloat::rmTowardNegative },
+ { "1.5", "2", APFloat::rmNearestTiesToEven },
+ { "1.5", "2", APFloat::rmNearestTiesToAway },
+
+ { "-1.5", "-1", APFloat::rmTowardZero },
+ { "-1.5", "-1", APFloat::rmTowardPositive },
+ { "-1.5", "-2", APFloat::rmTowardNegative },
+ { "-1.5", "-2", APFloat::rmNearestTiesToEven },
+ { "-1.5", "-2", APFloat::rmNearestTiesToAway },
+
+ { "1.75", "1", APFloat::rmTowardZero },
+ { "1.75", "2", APFloat::rmTowardPositive },
+ { "1.75", "1", APFloat::rmTowardNegative },
+ { "1.75", "2", APFloat::rmNearestTiesToEven },
+ { "1.75", "2", APFloat::rmNearestTiesToAway },
+
+ { "-1.75", "-1", APFloat::rmTowardZero },
+ { "-1.75", "-1", APFloat::rmTowardPositive },
+ { "-1.75", "-2", APFloat::rmTowardNegative },
+ { "-1.75", "-2", APFloat::rmNearestTiesToEven },
+ { "-1.75", "-2", APFloat::rmNearestTiesToAway },
+
+ // check how a value with an even integer component rounds
+ { "2", "2", APFloat::rmTowardZero },
+ { "2", "2", APFloat::rmTowardPositive },
+ { "2", "2", APFloat::rmTowardNegative },
+ { "2", "2", APFloat::rmNearestTiesToEven },
+ { "2", "2", APFloat::rmNearestTiesToAway },
+
+ { "-2", "-2", APFloat::rmTowardZero },
+ { "-2", "-2", APFloat::rmTowardPositive },
+ { "-2", "-2", APFloat::rmTowardNegative },
+ { "-2", "-2", APFloat::rmNearestTiesToEven },
+ { "-2", "-2", APFloat::rmNearestTiesToAway },
+
+ { "2.25", "2", APFloat::rmTowardZero },
+ { "2.25", "3", APFloat::rmTowardPositive },
+ { "2.25", "2", APFloat::rmTowardNegative },
+ { "2.25", "2", APFloat::rmNearestTiesToEven },
+ { "2.25", "2", APFloat::rmNearestTiesToAway },
+
+ { "-2.25", "-2", APFloat::rmTowardZero },
+ { "-2.25", "-2", APFloat::rmTowardPositive },
+ { "-2.25", "-3", APFloat::rmTowardNegative },
+ { "-2.25", "-2", APFloat::rmNearestTiesToEven },
+ { "-2.25", "-2", APFloat::rmNearestTiesToAway },
+
+ { "2.5", "2", APFloat::rmTowardZero },
+ { "2.5", "3", APFloat::rmTowardPositive },
+ { "2.5", "2", APFloat::rmTowardNegative },
+ { "2.5", "2", APFloat::rmNearestTiesToEven },
+ { "2.5", "3", APFloat::rmNearestTiesToAway },
+
+ { "-2.5", "-2", APFloat::rmTowardZero },
+ { "-2.5", "-2", APFloat::rmTowardPositive },
+ { "-2.5", "-3", APFloat::rmTowardNegative },
+ { "-2.5", "-2", APFloat::rmNearestTiesToEven },
+ { "-2.5", "-3", APFloat::rmNearestTiesToAway },
+
+ { "2.75", "2", APFloat::rmTowardZero },
+ { "2.75", "3", APFloat::rmTowardPositive },
+ { "2.75", "2", APFloat::rmTowardNegative },
+ { "2.75", "3", APFloat::rmNearestTiesToEven },
+ { "2.75", "3", APFloat::rmNearestTiesToAway },
+
+ { "-2.75", "-2", APFloat::rmTowardZero },
+ { "-2.75", "-2", APFloat::rmTowardPositive },
+ { "-2.75", "-3", APFloat::rmTowardNegative },
+ { "-2.75", "-3", APFloat::rmNearestTiesToEven },
+ { "-2.75", "-3", APFloat::rmNearestTiesToAway },
+ // clang-format on
+ };
+
+ // do the tests
+ for (const auto &t : tests) {
+ for (const auto *S : HexFloatSemantics)
+ test_it(t, *S);
+ }
+}
+TEST(APFloatTest, CompareHexFloat) {
+ const APFloat::cmpResult LT = APFloat::cmpLessThan;
+ const APFloat::cmpResult EQ = APFloat::cmpEqual;
+ const APFloat::cmpResult GT = APFloat::cmpGreaterThan;
+
+ const std::array Values{"2.0", "3.0", "-2.31", "2.4", "-1.42", "-1.4"};
+
+ /* expected results */
+ APFloat::cmpResult Expected[][Values.size()] = {
+ // clang-format off
+ /* 2.0 3.0 -2.31 2.4 -1.42 -1.4 */
+ /* 2.0 */ { EQ, LT, GT, LT, GT, GT },
+ /* 3.0 */ { GT, EQ, GT, GT, GT, GT },
+ /* -2.31 */ { LT, LT, EQ, LT, LT, LT },
+ /* 2.4 */ { GT, LT, GT, EQ, GT, GT },
+ /* -1.42 */ { LT, LT, GT, LT, EQ, LT },
+ /* -1.4 */ { LT, LT, GT, LT, GT, EQ },
+ // clang-format on
+ };
+
+ for (const auto *S : HexFloatSemantics) {
+ for (auto i = 0; i != Values.size(); i++) {
+ const APFloat LHS(*S, Values[i]);
+ for (auto j = 0; j != Values.size(); j++) {
+ const APFloat RHS(*S, Values[j]);
+ // clang-format off
+ EXPECT_EQ(Expected[i][j], LHS.compare(RHS))
+ << " Semantics: " << APFloat::semanticsName(*S) << "\n"
+ << " (i, LHS) == (" << i << ", " << LHS << ")\n"
+ << " (j, RHS) == (" << j << ", " << RHS << ")\n";
+ // clang-format on
+ }
+ }
+ }
+}
+
TEST(APFloatTest, x87Largest) {
APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended());
EXPECT_TRUE(MaxX87Val.isLargest());
@@ -9414,8 +10779,28 @@ TEST(APFloatTest, getExactLog2) {
EXPECT_EQ(INT_MIN,
scalbn(Largest, 1, APFloat::rmNearestTiesToEven).getExactLog2());
- for (int i = MinExp - Precision + 1; i <= MaxExp; ++i) {
- EXPECT_EQ(i, scalbn(One, i, APFloat::rmNearestTiesToEven).getExactLog2());
+ int Log2OfRadix = llvm::countr_zero(APFloat::semanticsRadix(Semantics));
+
+ // HexFloat (effectively) has an implicit bit of zero. Therefore, 1 is
+ // is represented by 1/16 * 16^1. It follows that if the maximum
+ // allowed expoennt by the represntaion is MaxExp, the highest value x
+ // we can give scalbn(1, x) is x == (MaxExp - 1).
+ // (If x > MaxExp, then the representation is 1/16 * 16^1 * 16 ^ MaxExp,
+ // which is 1/16 * 16^(MaxExp + 1), which is out of range.)
+ int MaxExpBound = One.isHexFloat() ? MaxExp - 1 : MaxExp;
+ for (int i = MinExp - Precision + 1; i <= MaxExpBound; ++i) {
+ // We need to divide the value returned by scalbn by the log2 of the
+ // radix.
+ // (Note: that getExactLog2 requires a radix that is a power of 2.)
+ // Let R be the radix, and r be log2(R)
+ // scalbn(1, i) returns 1 * R^i == 1 * 2 ^ (r * i) == 2 ^ (r * i)
+ // log2(1 * R^i) == log2(2 ^ (r * i))
+ // == r * log2(2^i)
+ // == r * i
+ // Therefore log2(scalbn(1, i)) == r * i
+ // Hence the division by Log2OfRadix
+ EXPECT_EQ(i, scalbn(One, i, APFloat::rmNearestTiesToEven).getExactLog2() /
+ Log2OfRadix);
}
}
}
@@ -10173,6 +11558,9 @@ TEST(APFloatTest, hasSignBitInMSB) {
EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::PPCDoubleDouble()));
EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::IEEEquad()));
EXPECT_FALSE(APFloat::hasSignBitInMSB(APFloat::Float8E8M0FNU()));
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::HexFP32()));
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::HexFP64()));
+ EXPECT_TRUE(APFloat::hasSignBitInMSB(APFloat::HexFP128()));
}
TEST(APFloatTest, FrexpQuietSNaN) {
>From 72f1a577f2b46ca36384a43554a79c9c45ff5a8e Mon Sep 17 00:00:00 2001
From: Ariel Burton <ariel.burton at ibm.com>
Date: Wed, 4 Feb 2026 16:40:21 -0600
Subject: [PATCH 2/4] update formatting
---
llvm/include/llvm/ADT/APFloat.h | 2 +-
llvm/lib/Support/APFloat.cpp | 18 ++--------
llvm/unittests/ADT/APFloatTest.cpp | 55 ++++++++++++------------------
3 files changed, 25 insertions(+), 50 deletions(-)
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index c60b7eccce561..66a4ef7c9368c 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -691,7 +691,7 @@ class IEEEFloat final {
/// returns the exponent
LLVM_ABI ExponentType getExponent() const;
-
+
/// returns the significand
LLVM_ABI APInt getSignificand() const;
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index caa576a26af04..81bb4408bb8a3 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -76,13 +76,7 @@ constexpr fltSemantics APFloatBase::semFloat8E5M2FNUZ = {
fltNanEncoding::NegativeZero};
constexpr fltSemantics APFloatBase::semFloat8E4M3 = {"Float8E4M3", 7, -6, 4, 8};
constexpr fltSemantics APFloatBase::semFloat8E4M3FN = {
- "Float8E4M3FN",
- 8,
- -6,
- 4,
- 8,
- 2,
- fltNonfiniteBehavior::NanOnly,
+ "Float8E4M3FN", 8, -6, 4, 8, 2, fltNonfiniteBehavior::NanOnly,
fltNanEncoding::AllOnes};
constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
"Float8E4M3FNUZ",
@@ -94,13 +88,7 @@ constexpr fltSemantics APFloatBase::semFloat8E4M3FNUZ = {
fltNonfiniteBehavior::NanOnly,
fltNanEncoding::NegativeZero};
constexpr fltSemantics APFloatBase::semFloat8E4M3B11FNUZ = {
- "Float8E4M3B11FNUZ",
- 4,
- -10,
- 4,
- 8,
- 2,
- fltNonfiniteBehavior::NanOnly,
+ "Float8E4M3B11FNUZ", 4, -10, 4, 8, 2, fltNonfiniteBehavior::NanOnly,
fltNanEncoding::NegativeZero};
constexpr fltSemantics APFloatBase::semFloat8E3M4 = {"Float8E3M4", 3, -2, 5, 8};
constexpr fltSemantics APFloatBase::semFloatTF32 = {"FloatTF32", 127, -126, 11,
@@ -7922,7 +7910,7 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
return;
}
if (usesLayout<DoubleAPFloat>(Semantics)) {
- const fltSemantics& S = F.getSemantics();
+ const fltSemantics &S = F.getSemantics();
new (&Double) DoubleAPFloat(Semantics, APFloat(std::move(F), S),
APFloat(APFloatBase::IEEEdouble()));
return;
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index f76251c88a8fa..bc675ff216220 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1506,7 +1506,10 @@ TEST(APFloatTest, fromZeroDecimalLargeExponentString) {
{SR("0e1234"), SR("+0e1234"), SR("-0e1234"), SR("0e+1234"),
SR("+0e+1234"), SR("-0e+1234"), SR("0e-1234"), SR("+0e-1234"),
SR("-0e-1234"), SR("000.0000e1234"), SR("000.0000e-1234"),
- SR("0e1234" "\0" "2", 6) }) {
+ SR("0e1234"
+ "\0"
+ "2",
+ 6)}) {
const bool ExpectNegative = ('-' == s[0]);
// set to something known not to be zero so we know we are
// converting the string
@@ -1713,8 +1716,7 @@ TEST(APFloatTest, fromDecimalString) {
{ "1.1", "1.1e" }, { "1.1", "+1.1e" }, { "-1.1", "-1.1e" },
{ "1", "1e+" }, { "1", "1e-" }, { "0.1", ".1e" },
{ "0.1", ".1e+" }, { "0.1", ".1e-" }, { "1", "1.0e" },
- { "1", "1.0e+" }, { "1", "1.0e-" }
- // clang-format on
+ { "1", "1.0e+" }, { "1", "1.0e-" } // clang-format on
};
bool HasError;
@@ -1731,9 +1733,8 @@ TEST(APFloatTest, fromDecimalString) {
EXPECT_TRUE(HFP.isZero());
HasError = !HFP.convertFromString(test.value, APFloat::rmTowardZero);
EXPECT_FALSE(HasError);
- EXPECT_TRUE(HFP.isNonZero())
- << " Semantics: " << SemanticsName << "\n"
- << " value is " << test.value;
+ EXPECT_TRUE(HFP.isNonZero()) << " Semantics: " << SemanticsName << "\n"
+ << " value is " << test.value;
EXPECT_EQ(ExpectNegative, HFP.isNegative())
<< " Semantics: " << SemanticsName << "\n"
<< " value is " << test.value;
@@ -5554,78 +5555,64 @@ TEST(APFloatTest, frexpHexFloat) {
// having to deal with the exact encoding, which is tricky
// for the low exponent in the 128 case.
Frac = frexp(One, Exp, RM);
- EXPECT_EQ(1, Exp)
- << " Semantics: " << SemanticsName;
+ EXPECT_EQ(1, Exp) << " Semantics: " << SemanticsName;
EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, "0.5"), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
Frac = frexp(MOne, Exp, RM);
- EXPECT_EQ(1, Exp)
- << " Semantics: " << SemanticsName;
+ EXPECT_EQ(1, Exp) << " Semantics: " << SemanticsName;
EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, "0.5"), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
Frac = frexp(LargestDenormal, Exp, RM);
- EXPECT_EQ(-260, Exp)
- << " Semantics: " << SemanticsName;
+ EXPECT_EQ(-260, Exp) << " Semantics: " << SemanticsName;
Expected = "0x.";
Expected.append(APFloat::semanticsPrecision(*S) - 1, 'f');
Expected.push_back('0');
EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, Expected), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
Frac = frexp(NegLargestDenormal, Exp, RM);
- EXPECT_EQ(-260, Exp)
- << " Semantics: " << SemanticsName;
+ EXPECT_EQ(-260, Exp) << " Semantics: " << SemanticsName;
Expected = "0x.";
Expected.append(APFloat::semanticsPrecision(*S) - 1, 'f');
Expected.push_back('0');
EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, Expected), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
Frac = frexp(Smallest, Exp, RM);
EXPECT_EQ(-256 - 4 * ((int)APFloat::semanticsPrecision(*S) - 1) - 3, Exp)
<< " Semantics: " << APFloat::semanticsName(*S);
EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, "0.5"), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
Frac = frexp(NegSmallest, Exp, RM);
EXPECT_EQ(-256 - 4 * ((int)APFloat::semanticsPrecision(*S) - 1) - 3, Exp)
<< " Semantics: " << SemanticsName;
EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, "0.5"), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
Frac = frexp(Largest, Exp, RM);
- EXPECT_EQ(252, Exp)
- << " Semantics: " << SemanticsName;
+ EXPECT_EQ(252, Exp) << " Semantics: " << SemanticsName;
Expected = "0x.";
Expected.append(APFloat::semanticsPrecision(*S), 'f');
EXPECT_EQ(APFloat::opOK, Frac.subtract(APFloat(*S, Expected), RM))
<< " Semantics: " << APFloat::semanticsName(*S);
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
- Frac = frexp(NegLargest, Exp, RM);
- EXPECT_EQ(252, Exp)
+ Frac = frexp(NegLargest, Exp, RM); EXPECT_EQ(252, Exp)
<< " Semantics: " << SemanticsName;
Expected = "0x.";
Expected.append(APFloat::semanticsPrecision(*S), 'f');
EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, Expected), RM))
<< " Semantics: " << SemanticsName;
- EXPECT_TRUE(Frac.isZero())
- << " Semantics: " << SemanticsName;
+ EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
}
}
>From 38904c8aae0bd6886d29bc2e9f69053ac8294926 Mon Sep 17 00:00:00 2001
From: Ariel Burton <ariel.burton at ibm.com>
Date: Fri, 6 Feb 2026 10:35:13 -0600
Subject: [PATCH 3/4] make APInt::compare private again, and fix typos
---
llvm/include/llvm/ADT/APInt.h | 8 ++++----
llvm/lib/Support/APFloat.cpp | 12 ++++++++----
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index b1d9e43961447..9193b5f8994e0 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1923,10 +1923,6 @@ class [[nodiscard]] APInt {
return tcAddPart(dst, 1, parts);
}
- /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
- /// to, or greater than RHS.
- LLVM_ABI int compare(const APInt &RHS) const LLVM_READONLY;
-
/// Decrement a bignum in-place. Return the borrow flag.
static WordType tcDecrement(WordType *dst, unsigned parts) {
return tcSubtractPart(dst, 1, parts);
@@ -2110,6 +2106,10 @@ class [[nodiscard]] APInt {
/// out-of-line slow case for operator^=.
LLVM_ABI void xorAssignSlowCase(const APInt &RHS);
+ /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+ /// to, or greater than RHS.
+ LLVM_ABI int compare(const APInt &RHS) const LLVM_READONLY;
+
/// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
/// to, or greater than RHS.
LLVM_ABI int compareSigned(const APInt &RHS) const LLVM_READONLY;
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 81bb4408bb8a3..6f35eefb29f9e 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -59,7 +59,7 @@ constexpr fltSemantics APFloatBase::semIEEEhalf = {"IEEEhalf", 15, -14, 11, 16};
constexpr fltSemantics APFloatBase::semBFloat = {"BFloat", 127, -126, 8, 16};
constexpr fltSemantics APFloatBase::semIEEEsingle = {"IEEESingle", 127, -126,
24, 32};
-constexpr fltSemantics APFloatBase::semIEEEdouble = {"IEEEdoublle", 1023, -1022,
+constexpr fltSemantics APFloatBase::semIEEEdouble = {"IEEEdouble", 1023, -1022,
53, 64};
constexpr fltSemantics APFloatBase::semIEEEquad = {"IEEEquad", 16383, -16382,
113, 128};
@@ -5933,7 +5933,7 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
std::move(Second));
}
-// class HexFloatArith implements HFP arithemtic using the conventions
+// class HexFloatArith implements HFP arithmetic using the conventions
// and approaches of the arith library, and matches the behaviour
// of the hardware.
class HexFloatArith {
@@ -6953,8 +6953,12 @@ cmpResult HexFloat::compareAbsoluteValue(const HexFloat &rhs) const {
/* If exponents are equal, do an unsigned bignum comparison of the
significands. */
- if (compare == 0)
- compare = significand.compare(rhs.significand);
+ if (compare == 0) {
+ if (significand.ugt(rhs.significand))
+ compare = 1;
+ else if (significand.ult(rhs.significand))
+ compare = -1;
+ }
if (compare > 0)
return cmpGreaterThan;
>From 804100ee766e1bd0038da6eeb615ba866f94b347 Mon Sep 17 00:00:00 2001
From: Ariel Burton <ariel.burton at ibm.com>
Date: Fri, 13 Feb 2026 12:41:24 -0600
Subject: [PATCH 4/4] changes in response to comments
---
llvm/lib/Support/APFloat.cpp | 4 ++--
llvm/unittests/ADT/APFloatTest.cpp | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 6f35eefb29f9e..79005b2fd87e0 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -5934,8 +5934,8 @@ DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
}
// class HexFloatArith implements HFP arithmetic using the conventions
-// and approaches of the arith library, and matches the behaviour
-// of the hardware.
+// and approaches of the arith library used by the IBM XL compiler,
+// and matches the behaviour of the hardware.
class HexFloatArith {
public:
struct value_t {
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index bc675ff216220..e3d091675d984 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -5606,8 +5606,8 @@ TEST(APFloatTest, frexpHexFloat) {
<< " Semantics: " << APFloat::semanticsName(*S);
EXPECT_TRUE(Frac.isZero()) << " Semantics: " << SemanticsName;
- Frac = frexp(NegLargest, Exp, RM); EXPECT_EQ(252, Exp)
- << " Semantics: " << SemanticsName;
+ Frac = frexp(NegLargest, Exp, RM);
+ EXPECT_EQ(252, Exp) << " Semantics: " << SemanticsName;
Expected = "0x.";
Expected.append(APFloat::semanticsPrecision(*S), 'f');
EXPECT_EQ(APFloat::opOK, Frac.add(APFloat(*S, Expected), RM))
More information about the cfe-commits
mailing list