[llvm] f6d143f - [APFloat] Properly implement frexp(DoubleAPFloat)

David Majnemer via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 12 17:08:07 PDT 2025


Author: David Majnemer
Date: 2025-08-12T17:03:27-07:00
New Revision: f6d143fd1f649a6aae826307faeb62992268e9f0

URL: https://github.com/llvm/llvm-project/commit/f6d143fd1f649a6aae826307faeb62992268e9f0
DIFF: https://github.com/llvm/llvm-project/commit/f6d143fd1f649a6aae826307faeb62992268e9f0.diff

LOG: [APFloat] Properly implement frexp(DoubleAPFloat)

The prior implementation did not consider that the Lo component may
underflow when it undergoes scaling.  This means that we need to
carefully handle things like binade crossings or how to handle
roundTowardZero when Hi and Lo have different signs.

Particularly annoying is roundTiesToAway when Hi and Lo have different
signs.  It basically requires us to implement roundTiesTowardZero.

Added: 
    

Modified: 
    llvm/include/llvm/ADT/APFloat.h
    llvm/lib/Support/APFloat.cpp
    llvm/unittests/ADT/APFloatTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index a67c05ef2d626..5e59ca9950226 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -787,17 +787,7 @@ class IEEEFloat final {
 };
 
 LLVM_ABI hash_code hash_value(const IEEEFloat &Arg);
-/// Returns the exponent of the internal representation of the APFloat.
-///
-/// Because the radix of APFloat is 2, this is equivalent to floor(log2(x)).
-/// For special APFloat values, this returns special error codes:
-///
-///   NaN -> \c IEK_NaN
-///   0   -> \c IEK_Zero
-///   Inf -> \c IEK_Inf
-///
 LLVM_ABI int ilogb(const IEEEFloat &Arg);
-/// Returns: X * 2^Exp for integral exponents.
 LLVM_ABI IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode);
 LLVM_ABI IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM);
 
@@ -1529,13 +1519,7 @@ class APFloat : public APFloatBase {
   }
 
   LLVM_ABI friend hash_code hash_value(const APFloat &Arg);
-  friend int ilogb(const APFloat &Arg) {
-    if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
-      return ilogb(Arg.getIEEE());
-    if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
-      return ilogb(Arg.getIEEE());
-    llvm_unreachable("Unexpected semantics");
-  }
+  friend int ilogb(const APFloat &Arg);
   friend APFloat scalbn(APFloat X, int Exp, roundingMode RM);
   friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM);
   friend IEEEFloat;
@@ -1550,6 +1534,25 @@ static_assert(sizeof(APFloat) == sizeof(detail::IEEEFloat),
 /// These additional declarations are required in order to compile LLVM with IBM
 /// xlC compiler.
 LLVM_ABI hash_code hash_value(const APFloat &Arg);
+
+/// Returns the exponent of the internal representation of the APFloat.
+///
+/// Because the radix of APFloat is 2, this is equivalent to floor(log2(x)).
+/// For special APFloat values, this returns special error codes:
+///
+///   NaN -> \c IEK_NaN
+///   0   -> \c IEK_Zero
+///   Inf -> \c IEK_Inf
+///
+inline int ilogb(const APFloat &Arg) {
+  if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
+    return ilogb(Arg.U.IEEE);
+  if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
+    return ilogb(Arg.U.Double);
+  llvm_unreachable("Unexpected semantics");
+}
+
+/// Returns: X * 2^Exp for integral exponents.
 inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) {
   if (APFloat::usesLayout<detail::IEEEFloat>(X.getSemantics()))
     return APFloat(scalbn(X.U.IEEE, Exp, RM), X.getSemantics());

diff  --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 3352b9e5d6f9f..529d00201b9d4 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -5749,16 +5749,15 @@ int DoubleAPFloat::getExactLog2Abs() const {
   return getFirst().getExactLog2Abs();
 }
 
-int ilogb(const DoubleAPFloat& Arg) {
-  const APFloat& Hi = Arg.getFirst();
-  const APFloat& Lo = Arg.getSecond();
+int ilogb(const DoubleAPFloat &Arg) {
+  const APFloat &Hi = Arg.getFirst();
+  const APFloat &Lo = Arg.getSecond();
   int IlogbResult = ilogb(Hi);
   // Zero and non-finite values can delegate to ilogb(Hi).
   if (Arg.getCategory() != fcNormal)
     return IlogbResult;
   // If Lo can't change the binade, we can delegate to ilogb(Hi).
-  if (Lo.isZero() ||
-      Hi.isNegative() == Lo.isNegative())
+  if (Lo.isZero() || Hi.isNegative() == Lo.isNegative())
     return IlogbResult;
   if (Hi.getExactLog2Abs() == INT_MIN)
     return IlogbResult;
@@ -5777,10 +5776,101 @@ DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
                     APFloat::roundingMode RM) {
   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
-  APFloat First = frexp(Arg.Floats[0], Exp, RM);
-  APFloat Second = Arg.Floats[1];
-  if (Arg.getCategory() == APFloat::fcNormal)
-    Second = scalbn(Second, -Exp, RM);
+
+  // Get the unbiased exponent e of the number, where |Arg| = m * 2^e for m in
+  // [1.0, 2.0).
+  Exp = ilogb(Arg);
+
+  // For NaNs, quiet any signaling NaN and return the result, as per standard
+  // practice.
+  if (Exp == APFloat::IEK_NaN) {
+    DoubleAPFloat Quiet{Arg};
+    Quiet.getFirst().makeQuiet();
+    return Quiet;
+  }
+
+  // For infinity, return it unchanged. The exponent remains IEK_Inf.
+  if (Exp == APFloat::IEK_Inf)
+    return Arg;
+
+  // For zero, the fraction is zero and the standard requires the exponent be 0.
+  if (Exp == APFloat::IEK_Zero) {
+    Exp = 0;
+    return Arg;
+  }
+
+  const APFloat &Hi = Arg.getFirst();
+  const APFloat &Lo = Arg.getSecond();
+
+  // frexp requires the fraction's absolute value to be in [0.5, 1.0).
+  // ilogb provides an exponent for an absolute value in [1.0, 2.0).
+  // Increment the exponent to ensure the fraction is in the correct range.
+  ++Exp;
+
+  const bool SignsDisagree = Hi.isNegative() != Lo.isNegative();
+  APFloat Second = Lo;
+  if (Arg.getCategory() == APFloat::fcNormal && Lo.isFiniteNonZero()) {
+    roundingMode LoRoundingMode;
+    // The interpretation of rmTowardZero depends on the sign of the combined
+    // Arg rather than the sign of the component.
+    if (RM == rmTowardZero)
+      LoRoundingMode = Arg.isNegative() ? rmTowardPositive : rmTowardNegative;
+    // For rmNearestTiesToAway, we face a similar problem. If signs disagree,
+    // Lo is a correction *toward* zero relative to Hi. Rounding Lo
+    // "away from zero" based on its own sign would move the value in the
+    // wrong direction. As a safe proxy, we use rmNearestTiesToEven, which is
+    // direction-agnostic. We only need to bother with this if Lo is scaled
+    // down.
+    else if (RM == rmNearestTiesToAway && SignsDisagree && Exp > 0)
+      LoRoundingMode = rmNearestTiesToEven;
+    else
+      LoRoundingMode = RM;
+    Second = scalbn(Lo, -Exp, LoRoundingMode);
+    // The rmNearestTiesToEven proxy is correct most of the time, but it
+    // 
diff ers from rmNearestTiesToAway when the scaled value of Lo is an
+    // exact midpoint.
+    // NOTE: This is morally equivalent to roundTiesTowardZero.
+    if (RM == rmNearestTiesToAway && LoRoundingMode == rmNearestTiesToEven) {
+      // Re-scale the result back to check if rounding occurred.
+      const APFloat RecomposedLo = scalbn(Second, Exp, rmNearestTiesToEven);
+      if (RecomposedLo != Lo) {
+        // RoundingError tells us which direction we rounded:
+        //   - RoundingError > 0: we rounded up.
+        //   - RoundingError < 0: we down up.
+        const APFloat RoundingError = RecomposedLo - Lo;
+        // Determine if scalbn(Lo, -Exp) landed exactly on a midpoint.
+        // We do this by checking if the absolute rounding error is exactly
+        // half a ULP of the result.
+        const APFloat UlpOfSecond = harrisonUlp(Second);
+        const APFloat ScaledUlpOfSecond =
+            scalbn(UlpOfSecond, Exp - 1, rmNearestTiesToEven);
+        const bool IsMidpoint = abs(RoundingError) == ScaledUlpOfSecond;
+        const bool RoundedLoAway =
+            Second.isNegative() == RoundingError.isNegative();
+        // The sign of Hi and Lo disagree and we rounded Lo away: we must
+        // decrease the magnitude of Second to increase the magnitude
+        // First+Second.
+        if (IsMidpoint && RoundedLoAway)
+          Second.next(/*nextDown=*/!Second.isNegative());
+      }
+    }
+    // Handle a tricky edge case where Arg is slightly less than a power of two
+    // (e.g., Arg = 2^k - epsilon). In this situation:
+    // 1. Hi is 2^k, and Lo is a small negative value -epsilon.
+    // 2. ilogb(Arg) correctly returns k-1.
+    // 3. Our initial Exp becomes (k-1) + 1 = k.
+    // 4. Scaling Hi (2^k) by 2^-k would yield a magnitude of 1.0 and
+    //    scaling Lo by 2^-k would yield zero. This would make the result 1.0
+    //    which is an invalid fraction, as the required interval is [0.5, 1.0).
+    // We detect this specific case by checking if Hi is a power of two and if
+    // the scaled Lo underflowed to zero. The fix: Increment Exp to k+1. This
+    // adjusts the scale factor, causing Hi to be scaled to 0.5, which is a
+    // valid fraction.
+    if (Second.isZero() && SignsDisagree && Hi.getExactLog2Abs() != INT_MIN)
+      ++Exp;
+  }
+
+  APFloat First = scalbn(Hi, -Exp, RM);
   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
 }
 

diff  --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 3fcd7ecc2eb2c..29299cafcb014 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -2692,9 +2692,7 @@ static APFloat makeDoubleAPFloat(T Hi, U Lo) {
   return APFloat(APFloat::PPCDoubleDouble(), Bits);
 }
 
-static APFloat makeDoubleAPFloat(DD X) {
-  return makeDoubleAPFloat(X.Hi, X.Lo);
-}
+static APFloat makeDoubleAPFloat(DD X) { return makeDoubleAPFloat(X.Hi, X.Lo); }
 
 TEST(APFloatTest, PPCDoubleDouble) {
   APFloat test(APFloat::PPCDoubleDouble(), "1.0");
@@ -5804,8 +5802,8 @@ namespace PPCDoubleDoubleConvertToIntegerTestDetails {
 static constexpr auto RNE = APFloat::rmNearestTiesToEven;
 static constexpr auto RNA = APFloat::rmNearestTiesToAway;
 static constexpr auto RTZ = APFloat::rmTowardZero;
-static constexpr auto RTP = APFloat::rmTowardPositive;
-static constexpr auto RTN = APFloat::rmTowardNegative;
+static constexpr auto RUP = APFloat::rmTowardPositive;
+static constexpr auto RDN = APFloat::rmTowardNegative;
 
 struct TestCase {
   // Structure to hold the expected result of a conversion
@@ -5840,8 +5838,8 @@ struct TestCase {
     return with(RNE, ExpectedStr, Status)
         .with(RNA, ExpectedStr, Status)
         .with(RTZ, ExpectedStr, Status)
-        .with(RTP, ExpectedStr, Status)
-        .with(RTN, ExpectedStr, Status);
+        .with(RUP, ExpectedStr, Status)
+        .with(RDN, ExpectedStr, Status);
   }
 };
 
@@ -5894,24 +5892,24 @@ auto testCases() {
       // Input: 2.5 (Tie, preceding integer is Even)
       TestCase{{2.5, 0.0}, 32, true}
           .with(RTZ, "2", Inexact)
-          .with(RTN, "2", Inexact)
-          .with(RTP, "3", Inexact)
+          .with(RDN, "2", Inexact)
+          .with(RUP, "3", Inexact)
           .with(RNA, "3", Inexact)
           .with(RNE, "2", Inexact),
 
       // Input: 3.5 (Tie, preceding integer is Odd)
       TestCase{{3.5, 0.0}, 32, true}
           .with(RTZ, "3", Inexact)
-          .with(RTN, "3", Inexact)
-          .with(RTP, "4", Inexact)
+          .with(RDN, "3", Inexact)
+          .with(RUP, "4", Inexact)
           .with(RNA, "4", Inexact)
           .with(RNE, "4", Inexact),
 
       // Input: -2.5 (Tie, preceding integer is Even)
       TestCase{{-2.5, 0.0}, 32, true}
           .with(RTZ, "-2", Inexact)
-          .with(RTN, "-3", Inexact)
-          .with(RTP, "-2", Inexact)
+          .with(RDN, "-3", Inexact)
+          .with(RUP, "-2", Inexact)
           .with(RNA, "-3", Inexact)
           .with(RNE, "-2", Inexact),
 
@@ -5921,32 +5919,32 @@ auto testCases() {
       // Input: 2.5 + Epsilon (Slightly above tie)
       TestCase{{2.5, PM100}, 32, true}
           .with(RTZ, "2", Inexact)
-          .with(RTN, "2", Inexact)
-          .with(RTP, "3", Inexact)
+          .with(RDN, "2", Inexact)
+          .with(RUP, "3", Inexact)
           .with(RNA, "3", Inexact)
           .with(RNE, "3", Inexact),
 
       // Input: 2.5 - Epsilon (Slightly below tie)
       TestCase{{2.5, -PM100}, 32, true}
           .with(RTZ, "2", Inexact)
-          .with(RTN, "2", Inexact)
-          .with(RTP, "3", Inexact)
+          .with(RDN, "2", Inexact)
+          .with(RUP, "3", Inexact)
           .with(RNA, "2", Inexact)
           .with(RNE, "2", Inexact),
 
       // Input: 1.0 + Epsilon (Just above 1.0, e.g., 1.00...1)
       TestCase{{1.0, PM100}, 32, true}
           .with(RTZ, "1", Inexact)
-          .with(RTN, "1", Inexact)
-          .with(RTP, "2", Inexact)
+          .with(RDN, "1", Inexact)
+          .with(RUP, "2", Inexact)
           .with(RNA, "1", Inexact)
           .with(RNE, "1", Inexact),
 
       // Input: 1.0 - Epsilon (Just below 1.0, e.g. 0.999...)
       TestCase{{1.0, -PM100}, 32, true}
           .with(RTZ, "0", Inexact)
-          .with(RTN, "0", Inexact)
-          .with(RTP, "1", Inexact)
+          .with(RDN, "0", Inexact)
+          .with(RUP, "1", Inexact)
           .with(RNA, "1", Inexact)
           .with(RNE, "1", Inexact),
 
@@ -5958,8 +5956,8 @@ auto testCases() {
       // Value is 9007199254740993.5
       TestCase{{P53 + 2.0, -0.5}, 64, true}
           .with(RTZ, "9007199254740993", Inexact)
-          .with(RTN, "9007199254740993", Inexact)
-          .with(RTP, "9007199254740994", Inexact)
+          .with(RDN, "9007199254740993", Inexact)
+          .with(RUP, "9007199254740994", Inexact)
           .with(RNA, "9007199254740994", Inexact)
           .with(RNE, "9007199254740994", Inexact),
 
@@ -5973,18 +5971,18 @@ auto testCases() {
       // Represented as (2^63, -0.7)
       TestCase{{P63, -0.7}, 64, true}
           .with(RTZ, "9223372036854775807", Inexact)
-          .with(RTN, "9223372036854775807", Inexact)
+          .with(RDN, "9223372036854775807", Inexact)
           .with(RNA, "9223372036854775807", Inexact)
           .with(RNE, "9223372036854775807", Inexact)
-          .with(RTP, "9223372036854775807", Invalid),
+          .with(RUP, "9223372036854775807", Invalid),
 
       // Input: INT64_MAX + 0.5 (Tie at the boundary)
       // Represented as (2^63, -0.5). Target integers are MAX (odd) and 2^63
       // (even).
       TestCase{{P63, -0.5}, 64, true}
           .with(RTZ, "9223372036854775807", Inexact)
-          .with(RTN, "9223372036854775807", Inexact)
-          .with(RTP, "9223372036854775807", Invalid)
+          .with(RDN, "9223372036854775807", Inexact)
+          .with(RUP, "9223372036854775807", Invalid)
           .with(RNA, "9223372036854775807", Invalid)
           .with(RNE, "9223372036854775807", Invalid),
 
@@ -5992,8 +5990,8 @@ auto testCases() {
       // Represented as (2^55 - 2^2, 2^1 - 2^-1).
       TestCase{{0x1.fffffffffffffp+54, 0x1.8p0}, 56, true}
           .with(RTZ, "36028797018963965", Inexact)
-          .with(RTN, "36028797018963965", Inexact)
-          .with(RTP, "36028797018963966", Inexact)
+          .with(RDN, "36028797018963965", Inexact)
+          .with(RUP, "36028797018963966", Inexact)
           .with(RNA, "36028797018963966", Inexact)
           .with(RNE, "36028797018963966", Inexact),
 
@@ -6001,8 +5999,8 @@ auto testCases() {
       // Represented as (2^55 - 2^2, 2^1 - 2^-52).
       TestCase{{0x1.fffffffffffffp+54, 0x1.fffffffffffffp0}, 56, true}
           .with(RTZ, "36028797018963965", Inexact)
-          .with(RTN, "36028797018963965", Inexact)
-          .with(RTP, "36028797018963966", Inexact)
+          .with(RDN, "36028797018963965", Inexact)
+          .with(RUP, "36028797018963966", Inexact)
           .with(RNA, "36028797018963966", Inexact)
           .with(RNE, "36028797018963966", Inexact),
 
@@ -6016,9 +6014,9 @@ auto testCases() {
       // Target integers are -2^63-1 (odd) and MIN (even).
       TestCase{{-P63, -0.5}, 64, true}
           .with(RTZ, "-9223372036854775808", Inexact)
-          .with(RTP, "-9223372036854775808", Inexact)
-          // RTN rounds down, causing overflow.
-          .with(RTN, "-9223372036854775808", Invalid)
+          .with(RUP, "-9223372036854775808", Inexact)
+          // RDN rounds down, causing overflow.
+          .with(RDN, "-9223372036854775808", Invalid)
           // RNA rounds away (down), causing overflow.
           .with(RNA, "-9223372036854775808", Invalid)
           // RNE rounds to even (up to -2^63), which is OK.
@@ -6035,9 +6033,9 @@ auto testCases() {
       // Represented as (2^64, -0.5)
       TestCase{{P64, -0.5}, 64, false}
           .with(RTZ, "18446744073709551615", Inexact)
-          .with(RTN, "18446744073709551615", Inexact)
-          // RTP rounds up (2^64), causing overflow.
-          .with(RTP, "18446744073709551615", Invalid)
+          .with(RDN, "18446744073709551615", Inexact)
+          // RUP rounds up (2^64), causing overflow.
+          .with(RUP, "18446744073709551615", Invalid)
           // RNA rounds away (up), causing overflow.
           .with(RNA, "18446744073709551615", Invalid)
           // RNE rounds to even (up to 2^64), causing overflow.
@@ -6047,8 +6045,8 @@ auto testCases() {
       // Represented as (2^55 - 2^2, 2^1 - 2^-1).
       TestCase{{0x1.fffffffffffffp+54, 0x1.8p0}, 55, false}
           .with(RTZ, "36028797018963965", Inexact)
-          .with(RTN, "36028797018963965", Inexact)
-          .with(RTP, "36028797018963966", Inexact)
+          .with(RDN, "36028797018963965", Inexact)
+          .with(RUP, "36028797018963966", Inexact)
           .with(RNA, "36028797018963966", Inexact)
           .with(RNE, "36028797018963966", Inexact),
 
@@ -6056,26 +6054,26 @@ auto testCases() {
       // Represented as (2^55 - 2^2, 2^1 - 2^-52).
       TestCase{{0x1.fffffffffffffp+54, 0x1.fffffffffffffp0}, 55, false}
           .with(RTZ, "36028797018963965", Inexact)
-          .with(RTN, "36028797018963965", Inexact)
-          .with(RTP, "36028797018963966", Inexact)
+          .with(RDN, "36028797018963965", Inexact)
+          .with(RUP, "36028797018963966", Inexact)
           .with(RNA, "36028797018963966", Inexact)
           .with(RNE, "36028797018963966", Inexact),
 
       // Input: -0.3 (Slightly below zero)
       TestCase{{-0.3, 0.0}, 64, false}
           .with(RTZ, "0", Inexact)
-          .with(RTP, "0", Inexact)
+          .with(RUP, "0", Inexact)
           .with(RNA, "0", Inexact)
           .with(RNE, "0", Inexact)
-          .with(RTN, "0", Invalid),
+          .with(RDN, "0", Invalid),
 
       // Input: -0.5 (Tie at zero)
       TestCase{{-0.5, 0.0}, 64, false}
           .with(RTZ, "0", Inexact)
-          .with(RTP, "0", Inexact)
+          .with(RUP, "0", Inexact)
           // RNE rounds to even (0).
           .with(RNE, "0", Inexact)
-          .with(RTN, "0", Invalid)
+          .with(RDN, "0", Invalid)
           // RNA rounds away (-1), causing overflow.
           .with(RNA, "0", Invalid),
 
@@ -6098,10 +6096,10 @@ auto testCases() {
       // 7. Round to negative -0
       TestCase{{-PM100, 0.0}, 32, true}
           .with(RTZ, "0", Inexact)
-          .with(RTP, "0", Inexact)
+          .with(RUP, "0", Inexact)
           .with(RNA, "0", Inexact)
           .with(RNE, "0", Inexact)
-          .with(RTN, "-1", Inexact),
+          .with(RDN, "-1", Inexact),
   };
   return ConvertToIntegerTestCases;
 }
@@ -6347,19 +6345,329 @@ TEST(APFloatTest, PPCDoubleDoubleScalbn) {
   EXPECT_EQ(0x3cc8000000000000ull, Result.bitcastToAPInt().getRawData()[1]);
 }
 
-TEST(APFloatTest, PPCDoubleDoubleFrexp) {
-  // 3.0 + 3.0 << 53
-  uint64_t Input[] = {
-      0x4008000000000000ull, 0x3cb8000000000000ull,
+namespace PPCDoubleDoubleFrexpTestDetails {
+// Define the rounding modes for easier readability.
+static constexpr auto RNE = APFloat::rmNearestTiesToEven;
+static constexpr auto RNA = APFloat::rmNearestTiesToAway;
+static constexpr auto RTZ = APFloat::rmTowardZero;
+static constexpr auto RUP = APFloat::rmTowardPositive;
+static constexpr auto RDN = APFloat::rmTowardNegative;
+
+struct TestCase {
+  // Structure to hold the expected result of a conversion
+  struct ExpectedFractionExponent {
+    DD Fraction;
+    int Exponent;
+    friend APFloat::cmpResult compare(const ExpectedFractionExponent &Lhs,
+                                      const ExpectedFractionExponent &Rhs) {
+      const APFloat LhsFraction = makeDoubleAPFloat(Lhs.Fraction);
+      const APFloat RhsFraction = makeDoubleAPFloat(Rhs.Fraction);
+      const APFloat::cmpResult FractionRelation =
+          LhsFraction.compare(RhsFraction);
+      if (FractionRelation == APFloat::cmpUnordered)
+        return APFloat::cmpUnordered;
+      if (LhsFraction.isZero() && RhsFraction.isZero())
+        return APFloat::cmpEqual;
+      if (!LhsFraction.isNegative() &&
+          (RhsFraction.isNegative() || RhsFraction.isZero()))
+        return APFloat::cmpGreaterThan;
+      if (!RhsFraction.isNegative() &&
+          (LhsFraction.isNegative() || LhsFraction.isZero()))
+        return APFloat::cmpLessThan;
+      if (Lhs.Exponent > Rhs.Exponent)
+        return LhsFraction.isNegative() ? APFloat::cmpLessThan
+                                        : APFloat::cmpGreaterThan;
+      if (Lhs.Exponent < Rhs.Exponent)
+        return RhsFraction.isNegative() ? APFloat::cmpGreaterThan
+                                        : APFloat::cmpLessThan;
+      return FractionRelation;
+    }
   };
-  int Exp;
-  // 0.75 + 0.75 << 53
-  APFloat Result =
-      frexp(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Input)), Exp,
-            APFloat::rmNearestTiesToEven);
-  EXPECT_EQ(2, Exp);
-  EXPECT_EQ(0x3fe8000000000000ull, Result.bitcastToAPInt().getRawData()[0]);
-  EXPECT_EQ(0x3c98000000000000ull, Result.bitcastToAPInt().getRawData()[1]);
+
+  DD Input;
+  // Array indexed by the rounding mode enum value.
+  std::array<ExpectedFractionExponent, 5> Rounded = {};
+
+  // Helper to define the expected results for a specific rounding mode.
+  constexpr TestCase &with(APFloat::roundingMode RM, DD ExpectedDD,
+                           int ExpectedExponent) {
+    Rounded[static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM)] = {
+        ExpectedDD,
+        ExpectedExponent,
+    };
+    return *this;
+  }
+
+  // Helper to define the same result for all rounding modes.
+  constexpr TestCase &withAll(DD ExpectedDD, int ExpectedExponent) {
+    return with(RNE, ExpectedDD, ExpectedExponent)
+        .with(RNA, ExpectedDD, ExpectedExponent)
+        .with(RTZ, ExpectedDD, ExpectedExponent)
+        .with(RUP, ExpectedDD, ExpectedExponent)
+        .with(RDN, ExpectedDD, ExpectedExponent);
+  }
+};
+
+auto testCases() {
+  static constexpr auto FrexpTestCases = std::array{
+      // Input: +infinity
+      TestCase{{std::numeric_limits<double>::infinity(), 0.0}}.withAll(
+          {std::numeric_limits<double>::infinity(), 0.0}, INT_MAX),
+
+      // Input: -infinity
+      TestCase{{-std::numeric_limits<double>::infinity(), 0.0}}.withAll(
+          {-std::numeric_limits<double>::infinity(), 0.0}, INT_MAX),
+
+      // Input: NaN
+      TestCase{{std::numeric_limits<double>::quiet_NaN(), 0.0}}.withAll(
+          {std::numeric_limits<double>::quiet_NaN(), 0.0}, INT_MIN),
+
+      // Input: 2^-1074
+      TestCase{{0x1p-1074, 0.0}}.withAll({0x1p-1, 0.0}, -1073),
+      TestCase{{-0x1p-1074, 0.0}}.withAll({-0x1p-1, 0.0}, -1073),
+
+      // Input: (2^1, -2^-1073 + -2^-1074)
+      TestCase{{0x1p1, -0x1.8p-1073}}
+          .withAll({0x1p0, -0x1p-1073}, 1)
+          .with(RNA, {0x1p0, -0x1p-1074}, 1)
+          .with(RUP, {0x1p0, -0x1p-1074}, 1),
+      TestCase{{-0x1p1, 0x1.8p-1073}}
+          .withAll({-0x1p0, 0x1p-1073}, 1)
+          .with(RNA, {-0x1p0, 0x1p-1074}, 1)
+          .with(RDN, {-0x1p0, 0x1p-1074}, 1),
+
+      // Input: (2^1, -2^-1073)
+      TestCase{{0x1p1, -0x1p-1073}}.withAll({0x1p0, -0x1p-1074}, 1),
+
+      // Input: (2^1, -2^-1074)
+      TestCase{{0x1p1, -0x1p-1074}}
+          .withAll({0x1p-1, -0.0}, 2)
+          .with(RDN, {0x1p0, -0x1p-1074}, 1)
+          .with(RTZ, {0x1p0, -0x1p-1074}, 1),
+
+      // Input: (2^2, -2^-1072 + -2^-1073 + -2^-1074)
+      TestCase{{0x1p2, -0x1.cp-1072}}
+          .withAll({0x1p0, -0x1p-1073}, 2)
+          .with(RUP, {0x1p0, -0x1p-1074}, 2),
+
+      // Input: (2^2, -2^-1072 + -2^-1073)
+      TestCase{{0x1p2, -0x1.8p-1072}}
+          .withAll({0x1p0, -0x1p-1073}, 2)
+          .with(RNA, {0x1p0, -0x1p-1074}, 2)
+          .with(RUP, {0x1p0, -0x1p-1074}, 2),
+      TestCase{{-0x1p2, 0x1.8p-1072}}
+          .withAll({-0x1p0, 0x1p-1073}, 2)
+          .with(RNA, {-0x1p0, 0x1p-1074}, 2)
+          .with(RDN, {-0x1p0, 0x1p-1074}, 2),
+
+      // Input: (2^2, -2^-1072 + -2^-1074)
+      TestCase{{0x1p2, -0x1.4cp-1072}}
+          .withAll({0x1p0, -0x1p-1074}, 2)
+          .with(RDN, {0x1p0, -0x1p-1073}, 2)
+          .with(RTZ, {0x1p0, -0x1p-1073}, 2),
+
+      // Input: (2^2, -2^-1072)
+      TestCase{{0x1p2, -0x1p-1072}}.withAll({0x1p0, -0x1p-1074}, 2),
+
+      // Input: (2^2, -2^-1073 + -2^-1074)
+      TestCase{{0x1p2, -0x1.8p-1073}}
+          .withAll({0x1p0, -0x1p-1074}, 2)
+          .with(RUP, {0x1p-1, -0.0}, 3),
+
+      // Input: (2^2, -2^-1073)
+      TestCase{{0x1p2, -0x1p-1073}}
+          .withAll({0x1p-1, -0.0}, 3)
+          .with(RDN, {0x1p0, -0x1p-1074}, 2)
+          .with(RTZ, {0x1p0, -0x1p-1074}, 2),
+
+      // Input: (2^2, -2^-1074)
+      TestCase{{0x1p2, -0x1p-1074}}
+          .withAll({0x1p-1, -0.0}, 3)
+          .with(RDN, {0x1p0, -0x1p-1074}, 2)
+          .with(RTZ, {0x1p0, -0x1p-1074}, 2),
+
+      // Input: 3+3*2^-53 canonicalized to (3+2^-51, -2^-53)
+      // Output: 0.75+0.75*2^-53 canonicalized to (.75+2^-53, -2^-55)
+      TestCase{{0x1.8000000000001p1, -0x1p-53}}.withAll(
+          {0x1.8000000000001p-1, -0x1p-55}, 2),
+      TestCase{{-0x1.8000000000001p1, 0x1p-53}}.withAll(
+          {-0x1.8000000000001p-1, 0x1p-55}, 2),
+
+      // Input: (2^1021+2^969, 2^968-2^915)
+      TestCase{{0x1.0000000000001p1021, 0x1.fffffffffffffp967}}.withAll(
+          {0x1.0000000000001p-1, 0x1.fffffffffffffp-55}, 1022),
+      TestCase{{-0x1.0000000000001p1021, -0x1.fffffffffffffp967}}.withAll(
+          {-0x1.0000000000001p-1, -0x1.fffffffffffffp-55}, 1022),
+
+      // Input: (2^1023, -2^-1)
+      TestCase{{0x1p+1023, -0x1p-1}}.withAll({0x1p0, -0x1p-1024}, 1023),
+      TestCase{{-0x1p+1023, 0x1p-1}}.withAll({-0x1p0, 0x1p-1024}, 1023),
+
+      // Input: (2^1023, -2^-51)
+      TestCase{{0x1p+1023, -0x1p-51}}.withAll({0x1p0, -0x1p-1074}, 1023),
+      TestCase{{-0x1p+1023, 0x1p-51}}.withAll({-0x1p0, 0x1p-1074}, 1023),
+
+      // Input: (2^1023, -2^-52)
+      TestCase{{0x1p+1023, -0x1p-52}}
+          .withAll({0x1p-1, -0x0p0}, 1024)
+          .with(RDN, {0x1p0, -0x1p-1074}, 1023)
+          .with(RTZ, {0x1p0, -0x1p-1074}, 1023),
+      TestCase{{-0x1p+1023, 0x1p-52}}
+          .withAll({-0x1p-1, 0x0p0}, 1024)
+          .with(RUP, {-0x1p0, 0x1p-1074}, 1023)
+          .with(RTZ, {-0x1p0, 0x1p-1074}, 1023),
+
+      // Input: (2^1023, 2^-1074)
+      TestCase{{0x1p+1023, 0x1p-1074}}
+          .withAll({0x1p-1, 0x0p+0}, 1024)
+          .with(RUP, {0x1p-1, 0x1p-1074}, 1024),
+      TestCase{{-0x1p+1023, -0x1p-1074}}
+          .withAll({-0x1p-1, -0x0p+0}, 1024)
+          .with(RDN, {-0x1p-1, -0x1p-1074}, 1024),
+
+      // Input: (2^1024-2^971, 2^970-2^918)
+      TestCase{{0x1.fffffffffffffp+1023, 0x1.ffffffffffffep+969}}.withAll(
+          {0x1.fffffffffffffp-1, 0x1.ffffffffffffep-55}, 1024),
+      TestCase{{-0x1.fffffffffffffp+1023, -0x1.ffffffffffffep+969}}.withAll(
+          {-0x1.fffffffffffffp-1, -0x1.ffffffffffffep-55}, 1024),
+  };
+  return FrexpTestCases;
+}
+} // namespace PPCDoubleDoubleFrexpTestDetails
+
+class PPCDoubleDoubleFrexpValueTest
+    : public testing::Test,
+      public ::testing::WithParamInterface<
+          PPCDoubleDoubleFrexpTestDetails::TestCase> {};
+
+INSTANTIATE_TEST_SUITE_P(
+    PPCDoubleDoubleFrexpValueParamTests, PPCDoubleDoubleFrexpValueTest,
+    ::testing::ValuesIn(PPCDoubleDoubleFrexpTestDetails::testCases()));
+
+TEST_P(PPCDoubleDoubleFrexpValueTest, PPCDoubleDoubleFrexp) {
+  const PPCDoubleDoubleFrexpTestDetails::TestCase Params = GetParam();
+  const APFloat Input = makeDoubleAPFloat(Params.Input);
+  auto RmToIdx = [](APFloat::roundingMode RM) {
+    return static_cast<std::underlying_type_t<APFloat::roundingMode>>(RM);
+  };
+  // First, make sure our expected results are consistent with each other before
+  // bothering to test the implementation.
+  if (Input.isFinite()) {
+    // Make sure the input is canonical.
+    EXPECT_EQ(APFloat{Params.Input.Hi},
+              APFloat{Params.Input.Hi} + APFloat{Params.Input.Lo})
+        << Params.Input.Hi << " + " << Params.Input.Lo;
+
+    const auto Dn = Params.Rounded[RmToIdx(APFloat::rmTowardNegative)];
+    const auto Up = Params.Rounded[RmToIdx(APFloat::rmTowardPositive)];
+    const auto Tz = Params.Rounded[RmToIdx(APFloat::rmTowardZero)];
+    const auto Ne = Params.Rounded[RmToIdx(APFloat::rmNearestTiesToEven)];
+    const auto Na = Params.Rounded[RmToIdx(APFloat::rmNearestTiesToAway)];
+
+    // The rdn result must be no larger than the rup result.
+    const APFloat::cmpResult DnVsUp = compare(Dn, Up);
+    EXPECT_TRUE(DnVsUp == APFloat::cmpLessThan || DnVsUp == APFloat::cmpEqual);
+
+    for (size_t I = 0, E = std::size(Params.Rounded); I != E; ++I) {
+      const APFloat RoundedFraction =
+          makeDoubleAPFloat(Params.Rounded[I].Fraction);
+      // All possible results should be bracketed by [Dn, Up].
+      const APFloat::cmpResult VsDn = compare(Params.Rounded[I], Dn);
+      EXPECT_TRUE(VsDn == APFloat::cmpGreaterThan || VsDn == APFloat::cmpEqual);
+      const APFloat::cmpResult VsUp = compare(Params.Rounded[I], Up);
+      EXPECT_TRUE(VsUp == APFloat::cmpLessThan || VsUp == APFloat::cmpEqual);
+      // A rounding result is either equal to the rup or rdn result.
+      EXPECT_TRUE(VsUp == APFloat::cmpEqual || VsDn == APFloat::cmpEqual);
+      // frexp returns a result whose magnitude is in in [.5, 1) so its exponent
+      // should be -1.
+      if (!RoundedFraction.isZero())
+        EXPECT_EQ(ilogb(RoundedFraction), -1)
+            << static_cast<APFloat::roundingMode>(I);
+      // Decomposition preserves sign.
+      EXPECT_EQ(RoundedFraction.isNegative(), Input.isNegative());
+      // A rounding result must be canonical.
+      EXPECT_EQ(APFloat{Params.Rounded[I].Fraction.Hi},
+                APFloat{Params.Rounded[I].Fraction.Hi} +
+                    APFloat{Params.Rounded[I].Fraction.Lo})
+          << Params.Rounded[I].Fraction.Hi << " + "
+          << Params.Rounded[I].Fraction.Lo;
+    }
+
+    // The rtz result must be either rup or rdn depending on the sign.
+    if (Input.isNegative()) {
+      const APFloat::cmpResult TzVsUp = compare(Tz, Up);
+      EXPECT_EQ(TzVsUp, APFloat::cmpEqual);
+    } else {
+      const APFloat::cmpResult TzVsDn = compare(Tz, Dn);
+      EXPECT_EQ(TzVsDn, APFloat::cmpEqual);
+    }
+
+    // The recomposed up should be at least as big as the input.
+    const APFloat RecomposedUp =
+        scalbn(makeDoubleAPFloat(Up.Fraction), Up.Exponent,
+               APFloat::rmNearestTiesToEven);
+    EXPECT_TRUE(RecomposedUp >= Input);
+    // The recomposed down can't be larger than the input.
+    const APFloat RecomposedDn =
+        scalbn(makeDoubleAPFloat(Dn.Fraction), Dn.Exponent,
+               APFloat::rmNearestTiesToEven);
+    EXPECT_TRUE(RecomposedDn <= Input);
+    // The recomposed tz must have a smaller magnitude.
+    const APFloat RecomposedTz =
+        scalbn(makeDoubleAPFloat(Tz.Fraction), Tz.Exponent,
+               APFloat::rmNearestTiesToEven);
+    EXPECT_TRUE(abs(RecomposedTz) <= abs(Input));
+    // Either both or neither of the recomposed round-to-nearest results are
+    // equal to the input.
+    const APFloat RecomposedNe =
+        scalbn(makeDoubleAPFloat(Ne.Fraction), Ne.Exponent,
+               APFloat::rmNearestTiesToEven);
+    const APFloat RecomposedNa =
+        scalbn(makeDoubleAPFloat(Na.Fraction), Na.Exponent,
+               APFloat::rmNearestTiesToEven);
+    EXPECT_EQ(RecomposedNe == Input, RecomposedNa == Input);
+    // Either the ne result equals the na result or the na result has a bigger
+    // magnitude.
+    const APFloat::cmpResult NeVsNa =
+        abs(RecomposedNe).compare(abs(RecomposedNa));
+    EXPECT_TRUE(NeVsNa == APFloat::cmpLessThan || NeVsNa == APFloat::cmpEqual);
+    // ne and na may only disagree if they broke a tie 
diff erently.
+    if (NeVsNa == APFloat::cmpLessThan) {
+      // ne's magnitude should be lower than input.
+      const APFloat::cmpResult NeVsInput =
+          abs(RecomposedNe).compare(abs(Input));
+      EXPECT_EQ(NeVsInput, APFloat::cmpLessThan);
+      // na's magnitude should be greater than input.
+      const APFloat::cmpResult NaVsInput =
+          abs(RecomposedNa).compare(abs(Input));
+      EXPECT_EQ(NaVsInput, APFloat::cmpGreaterThan);
+    }
+    // If up or down perfectly reconstructs the input, the round-to-nearest
+    // results should too.
+    if (RecomposedUp == Input || RecomposedDn == Input) {
+      EXPECT_EQ(RecomposedNe, Input);
+      EXPECT_EQ(RecomposedNa, Input);
+    }
+  }
+
+  for (size_t I = 0, E = std::size(Params.Rounded); I != E; ++I) {
+    const auto RM = static_cast<APFloat::roundingMode>(I);
+    const auto &Expected = Params.Rounded[I];
+    const APFloat ExpectedFraction = makeDoubleAPFloat(Expected.Fraction);
+
+    int ActualExponent;
+    const APFloat ActualFraction = frexp(Input, ActualExponent, RM);
+    if (ExpectedFraction.isNaN()) {
+      EXPECT_TRUE(ActualFraction.isNaN());
+    } else {
+      EXPECT_EQ(ActualFraction.compare(ExpectedFraction), APFloat::cmpEqual)
+          << ActualFraction << " vs " << ExpectedFraction << " for input "
+          << Params.Input.Hi << " + " << Params.Input.Lo << " RM " << RM;
+    }
+    EXPECT_EQ(ActualExponent, Expected.Exponent)
+        << "for input " << Params.Input.Hi << " + " << Params.Input.Lo
+        << " RM " << RM;
+  }
 }
 
 TEST(APFloatTest, PPCDoubleDoubleNext) {


        


More information about the llvm-commits mailing list