[llvm] 9fdb5e1 - [APFloat] Properly implement next for DoubleAPFloat
David Majnemer via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 12:38:30 PDT 2025
Author: David Majnemer
Date: 2025-08-01T12:34:33-07:00
New Revision: 9fdb5e1fef223c777d9cdf2f6d5eaffb33c59b0d
URL: https://github.com/llvm/llvm-project/commit/9fdb5e1fef223c777d9cdf2f6d5eaffb33c59b0d
DIFF: https://github.com/llvm/llvm-project/commit/9fdb5e1fef223c777d9cdf2f6d5eaffb33c59b0d.diff
LOG: [APFloat] Properly implement next for DoubleAPFloat
Rather than converting to the legacy 106-bit format, perform next() on the
low APFloat. Of course, we need to renormalize the two APFloats if
either of the two constraints are violated:
1. abs(low) <= ulp(high)/2
2. high = rtne(high + low)
Should renormalization be needed, it will increment the high component
and set low to the smallest value which obeys these rules.
Added:
Modified:
llvm/lib/Support/APFloat.cpp
llvm/unittests/ADT/APFloatTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 5e0b29ffb2590..46084c5b7fb92 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -900,6 +900,30 @@ writeSignedDecimal (char *dst, int value)
return dst;
}
+// Compute the ULP of the input using a definition from:
+// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504,
+// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503
+static APFloat harrisonUlp(const APFloat &X) {
+ const fltSemantics &Sem = X.getSemantics();
+ switch (X.getCategory()) {
+ case APFloat::fcNaN:
+ return APFloat::getQNaN(Sem);
+ case APFloat::fcInfinity:
+ return APFloat::getInf(Sem);
+ case APFloat::fcZero:
+ return APFloat::getSmallest(Sem);
+ case APFloat::fcNormal:
+ break;
+ }
+ if (X.isDenormal() || X.isSmallestNormalized())
+ return APFloat::getSmallest(Sem);
+ int Exp = ilogb(X);
+ if (X.getExactLog2() != INT_MIN)
+ Exp -= 1;
+ return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1),
+ APFloat::rmNearestTiesToEven);
+}
+
namespace detail {
/* Constructors. */
void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
@@ -5306,12 +5330,110 @@ Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
return Ret;
}
+// The double-double lattice of values corresponds to numbers which obey:
+// - abs(lo) <= 1/2 * ulp(hi)
+// - roundTiesToEven(hi + lo) == hi
+//
+// nextUp must choose the smallest output > input that follows these rules.
+// nexDown must choose the largest output < input that follows these rules.
APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
- APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
- auto Ret = Tmp.next(nextDown);
- *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
- return Ret;
+ // nextDown(x) = -nextUp(-x)
+ if (nextDown) {
+ changeSign();
+ APFloat::opStatus Result = next(/*nextDown=*/false);
+ changeSign();
+ return Result;
+ }
+ switch (getCategory()) {
+ case fcInfinity:
+ // nextUp(+inf) = +inf
+ // nextUp(-inf) = -getLargest()
+ if (isNegative())
+ makeLargest(true);
+ return opOK;
+
+ case fcNaN:
+ // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
+ // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
+ // change the payload.
+ if (getFirst().isSignaling()) {
+ // For consistency, propagate the sign of the sNaN to the qNaN.
+ makeNaN(false, isNegative(), nullptr);
+ return opInvalidOp;
+ }
+ return opOK;
+
+ case fcZero:
+ // nextUp(pm 0) = +getSmallest()
+ makeSmallest(false);
+ return opOK;
+
+ case fcNormal:
+ break;
+ }
+
+ const APFloat &HiOld = getFirst();
+ const APFloat &LoOld = getSecond();
+
+ APFloat NextLo = LoOld;
+ NextLo.next(/*nextDown=*/false);
+
+ // We want to admit values where:
+ // 1. abs(Lo) <= ulp(Hi)/2
+ // 2. Hi == RTNE(Hi + lo)
+ auto InLattice = [](const APFloat &Hi, const APFloat &Lo) {
+ return Hi + Lo == Hi;
+ };
+
+ // Check if (HiOld, nextUp(LoOld) is in the lattice.
+ if (InLattice(HiOld, NextLo)) {
+ // Yes, the result is (HiOld, nextUp(LoOld)).
+ Floats[1] = std::move(NextLo);
+
+ // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum
+ // value is defined to have exactly 106 bits of precision. This limitation
+ // results in semPPCDoubleDouble being unable to reach its maximum canonical
+ // value.
+ DoubleAPFloat Largest{*Semantics, uninitialized};
+ Largest.makeLargest(/*Neg=*/false);
+ if (compare(Largest) == cmpGreaterThan)
+ makeInf(/*Neg=*/false);
+
+ return opOK;
+ }
+
+ // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the
+ // correct result. We know the new hi component will be nextUp(HiOld) but our
+ // lattice rules make it a little ambiguous what the correct NextLo must be.
+ APFloat NextHi = HiOld;
+ NextHi.next(/*nextDown=*/false);
+
+ // nextUp(getLargest()) == INFINITY
+ if (NextHi.isInfinity()) {
+ makeInf(/*Neg=*/false);
+ return opOK;
+ }
+
+ // IEEE 754-2019 5.3.1:
+ // "If x is the negative number of least magnitude in x's format, nextUp(x) is
+ // -0."
+ if (NextHi.isZero()) {
+ makeZero(/*Neg=*/true);
+ return opOK;
+ }
+
+ // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to
+ // negative infinity as possible.
+ NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero));
+ if (!InLattice(NextHi, NextLo))
+ // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo.
+ NextLo.next(/*nextDown=*/false);
+
+ Floats[0] = std::move(NextHi);
+ Floats[1] = std::move(NextLo);
+
+ return opOK;
}
APFloat::opStatus
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 7a5fd83cd9581..9609e8e22a3ed 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -5540,6 +5540,287 @@ TEST(APFloatTest, PPCDoubleDoubleFrexp) {
EXPECT_EQ(0x3c98000000000000ull, Result.bitcastToAPInt().getRawData()[1]);
}
+TEST(APFloatTest, PPCDoubleDoubleNext) {
+ auto NextUp = [](APFloat X) {
+ X.next(/*nextDown=*/false);
+ return X;
+ };
+
+ auto NextDown = [](APFloat X) {
+ X.next(/*nextDown=*/true);
+ return X;
+ };
+
+ auto Zero = [] {
+ return APFloat::getZero(APFloat::IEEEdouble());
+ };
+
+ auto One = [] {
+ return APFloat::getOne(APFloat::IEEEdouble());
+ };
+
+ // 0x1p-1074
+ auto MinSubnormal = [] {
+ return APFloat::getSmallest(APFloat::IEEEdouble());
+ };
+
+ // 2^-52
+ auto Eps = [&] {
+ const fltSemantics &Sem = APFloat::IEEEdouble();
+ return scalbn(One(), 1 - APFloat::semanticsPrecision(Sem),
+ APFloat::rmNearestTiesToEven);
+ };
+
+ // 2^-53
+ auto EpsNeg = [&] { return scalbn(Eps(), -1, APFloat::rmNearestTiesToEven); };
+
+ auto MakeDoubleAPFloat = [](auto Hi, auto Lo) {
+ APFloat HiFloat{APFloat::IEEEdouble(), APFloat::uninitialized};
+ if constexpr (std::is_same_v<decltype(Hi), APFloat>) {
+ HiFloat = Hi;
+ } else {
+ HiFloat = {APFloat::IEEEdouble(), Hi};
+ }
+
+ APFloat LoFloat{APFloat::IEEEdouble(), APFloat::uninitialized};
+ if constexpr (std::is_same_v<decltype(Lo), APFloat>) {
+ LoFloat = Lo;
+ } else {
+ LoFloat = {APFloat::IEEEdouble(), Lo};
+ }
+
+ APInt Bits = LoFloat.bitcastToAPInt().concat(HiFloat.bitcastToAPInt());
+ return APFloat(APFloat::PPCDoubleDouble(), Bits);
+ };
+ APFloat Test(APFloat::PPCDoubleDouble(), APFloat::uninitialized);
+ APFloat Expected(APFloat::PPCDoubleDouble(), APFloat::uninitialized);
+
+ // 1. Test Special Cases Values.
+ //
+ // Test all special values for nextUp and nextDown prescribed by IEEE-754R
+ // 2008. These are:
+ // 1. +inf
+ // 2. -inf
+ // 3. getLargest()
+ // 4. -getLargest()
+ // 5. getSmallest()
+ // 6. -getSmallest()
+ // 7. qNaN
+ // 8. sNaN
+ // 9. +0
+ // 10. -0
+
+ // nextUp(+inf) = +inf.
+ Test = APFloat::getInf(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.isPosInfinity());
+ EXPECT_TRUE(!Test.isNegative());
+
+ // nextDown(+inf) = -nextUp(-inf) = -(-getLargest()) = getLargest()
+ Test = APFloat::getInf(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_FALSE(Test.isNegative());
+ EXPECT_TRUE(Test.isLargest());
+
+ // nextUp(-inf) = -getLargest()
+ Test = APFloat::getInf(APFloat::PPCDoubleDouble(), true);
+ Expected = APFloat::getLargest(APFloat::PPCDoubleDouble(), true);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.isNegative());
+ EXPECT_TRUE(Test.isLargest());
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // nextDown(-inf) = -nextUp(+inf) = -(+inf) = -inf.
+ Test = APFloat::getInf(APFloat::PPCDoubleDouble(), true);
+ Expected = APFloat::getInf(APFloat::PPCDoubleDouble(), true);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_TRUE(Test.isNegInfinity());
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // nextUp(getLargest()) = +inf
+ Test = APFloat::getLargest(APFloat::PPCDoubleDouble(), false);
+ Expected = APFloat::getInf(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.isPosInfinity());
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // nextUp(-getSmallest()) = -0.
+ Test = APFloat::getSmallest(Test.getSemantics(), /*Neg=*/true);
+ Expected = APFloat::getZero(APFloat::PPCDoubleDouble(), true);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.isNegZero());
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // nextDown(getSmallest()) = -nextUp(-getSmallest()) = -(-0) = +0.
+ Test = APFloat::getSmallest(Test.getSemantics(), /*Neg=*/false);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_TRUE(Test.isPosZero());
+
+ // nextDown(-getLargest()) = -nextUp(getLargest()) = -(inf) = -inf.
+ Test = APFloat::getLargest(APFloat::PPCDoubleDouble(), true);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_TRUE(Test.isNegInfinity());
+
+ // nextUp(qNaN) = qNaN
+ Test = APFloat::getQNaN(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.isNaN());
+ EXPECT_FALSE(Test.isSignaling());
+
+ // nextDown(qNaN) = qNaN
+ Test = APFloat::getQNaN(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_TRUE(Test.isNaN());
+ EXPECT_FALSE(Test.isSignaling());
+
+ // nextUp(sNaN) = qNaN
+ Test = APFloat::getSNaN(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(false), APFloat::opInvalidOp);
+ EXPECT_TRUE(Test.isNaN());
+ EXPECT_FALSE(Test.isSignaling());
+
+ // nextDown(sNaN) = qNaN
+ Test = APFloat::getSNaN(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(true), APFloat::opInvalidOp);
+ EXPECT_TRUE(Test.isNaN());
+ EXPECT_FALSE(Test.isSignaling());
+
+ // nextUp(+0) = +getSmallest()
+ Test = APFloat::getZero(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_FALSE(Test.isNegative());
+ EXPECT_TRUE(Test.isSmallest());
+
+ // nextDown(+0) = -nextUp(-0) = -getSmallest()
+ Test = APFloat::getZero(APFloat::PPCDoubleDouble(), false);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_TRUE(Test.isNegative());
+ EXPECT_TRUE(Test.isSmallest());
+
+ // nextUp(-0) = +getSmallest()
+ Test = APFloat::getZero(APFloat::PPCDoubleDouble(), true);
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_FALSE(Test.isNegative());
+ EXPECT_TRUE(Test.isSmallest());
+
+ // nextDown(-0) = -nextUp(0) = -getSmallest()
+ Test = APFloat::getZero(APFloat::PPCDoubleDouble(), true);
+ EXPECT_EQ(Test.next(true), APFloat::opOK);
+ EXPECT_TRUE(Test.isNegative());
+ EXPECT_TRUE(Test.isSmallest());
+
+ // 2. Cases where the lo APFloat is zero.
+
+ // 2a. |hi| < 2*DBL_MIN_NORMAL (DD precision == D precision)
+ Test = APFloat(APFloat::PPCDoubleDouble(), "0x1.fffffffffffffp-1022");
+ Expected = APFloat(APFloat::PPCDoubleDouble(), "0x1p-1021");
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual);
+
+ // 2b. |hi| >= 2*DBL_MIN_NORMAL (DD precision > D precision)
+ // Test at hi = 1.0, lo = 0.
+ Test = MakeDoubleAPFloat(One(), Zero());
+ Expected = MakeDoubleAPFloat(One(), MinSubnormal());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // Test at hi = -1.0. delta = 2^-1074 (positive, moving towards +Inf).
+ Test = MakeDoubleAPFloat(-One(), Zero());
+ Expected = MakeDoubleAPFloat(-One(), MinSubnormal());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // Testing the boundary where calculated delta equals DBL_TRUE_MIN.
+ // Requires ilogb(hi) = E = -968.
+ // delta = 2^(-968 - 106) = 2^-1074 = DBL_TRUE_MIN.
+ Test = MakeDoubleAPFloat("0x1p-968", Zero());
+ Expected = MakeDoubleAPFloat("0x1p-968", MinSubnormal());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // Testing below the boundary (E < -968). Delta clamps to DBL_TRUE_MIN.
+ Test = MakeDoubleAPFloat("0x1p-969", Zero());
+ Expected = MakeDoubleAPFloat("0x1p-969", MinSubnormal());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // 3. Standard Increment (No rollover)
+ // hi=1.0, lo=2^-1074.
+ Test = MakeDoubleAPFloat(One(), MinSubnormal());
+ Expected = MakeDoubleAPFloat(One(), NextUp(MinSubnormal()));
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // Incrementing negative lo.
+ Test = MakeDoubleAPFloat(One(), -MinSubnormal());
+ Expected = MakeDoubleAPFloat(One(), Zero());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual);
+
+ // Crossing lo=0.
+ Test = MakeDoubleAPFloat(One(), -MinSubnormal());
+ Expected = MakeDoubleAPFloat(One(), Zero());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual);
+
+ // 4. Rollover Cases around 1.0 (Positive hi)
+ // hi=1.0, lo=nextDown(2^-53).
+ Test = MakeDoubleAPFloat(One(), NextDown(EpsNeg()));
+ EXPECT_FALSE(Test.isDenormal());
+ Expected = MakeDoubleAPFloat(One(), EpsNeg());
+ EXPECT_FALSE(Test.isDenormal());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+
+ // Input: (1, ulp(1)/2). nextUp(lo)=next(H). V>Midpoint. Rollover occurs
+ // Can't naively increment lo:
+ // RTNE(0x1p+0 + 0x1.0000000000001p-53) == 0x1.0000000000001p+0.
+ // Can't naively TwoSum(0x1p+0, nextUp(0x1p-53)):
+ // It gives {nextUp(0x1p+0), nextUp(nextUp(-0x1p-53))} but the next
+ // number should be {nextUp(0x1p+0), nextUp(-0x1p-53)}.
+ Test = MakeDoubleAPFloat(One(), EpsNeg());
+ EXPECT_FALSE(Test.isDenormal());
+ Expected = MakeDoubleAPFloat(NextUp(One()), NextUp(-EpsNeg()));
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+ EXPECT_FALSE(Test.isDenormal());
+
+ // hi = nextDown(1), lo = nextDown(0x1p-54)
+ Test = MakeDoubleAPFloat(NextDown(One()), NextDown(APFloat(0x1p-54)));
+ EXPECT_FALSE(Test.isDenormal());
+ Expected = MakeDoubleAPFloat(One(), APFloat(-0x1p-54));
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+ EXPECT_FALSE(Test.isDenormal());
+
+ // 5. Negative Rollover (Moving towards Zero / +Inf)
+
+ // hi = -1, lo = nextDown(0x1p-54)
+ Test = MakeDoubleAPFloat(APFloat(-1.0), NextDown(APFloat(0x1p-54)));
+ EXPECT_FALSE(Test.isDenormal());
+ Expected = MakeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54));
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+ EXPECT_FALSE(Test.isDenormal());
+
+ // hi = -1, lo = 0x1p-54
+ Test = MakeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54));
+ EXPECT_FALSE(Test.isDenormal());
+ Expected =
+ MakeDoubleAPFloat(NextUp(APFloat(-1.0)), NextUp(APFloat(-0x1p-54)));
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+ EXPECT_FALSE(Test.isDenormal());
+
+ // 6. Rollover across Power of 2 boundary (Exponent change)
+ Test = MakeDoubleAPFloat(NextDown(APFloat(2.0)), NextDown(EpsNeg()));
+ EXPECT_FALSE(Test.isDenormal());
+ Expected = MakeDoubleAPFloat(APFloat(2.0), -EpsNeg());
+ EXPECT_EQ(Test.next(false), APFloat::opOK);
+ EXPECT_TRUE(Test.bitwiseIsEqual(Expected));
+ EXPECT_FALSE(Test.isDenormal());
+}
+
TEST(APFloatTest, x87Largest) {
APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended());
EXPECT_TRUE(MaxX87Val.isLargest());
More information about the llvm-commits
mailing list