[llvm] 45f3263 - [APFloat] Properly implement DoubleAPFloat::convertFromAPInt

David Majnemer via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 24 15:48:17 PDT 2025


Author: David Majnemer
Date: 2025-08-24T15:47:54-07:00
New Revision: 45f3263c6f93cc07faedb9b3ca2c56349f55e6e0

URL: https://github.com/llvm/llvm-project/commit/45f3263c6f93cc07faedb9b3ca2c56349f55e6e0
DIFF: https://github.com/llvm/llvm-project/commit/45f3263c6f93cc07faedb9b3ca2c56349f55e6e0.diff

LOG: [APFloat] Properly implement DoubleAPFloat::convertFromAPInt

The old implementation converted to the legacy semantics, inducing
rounding and not properly handling inputs like (2^1000 + 2^200) which
have have more precision than the legacy semantics can represent.

Instead, we convert the integer into two floats and an error.  The error
is used to implement the rounding behavior.

Remove related dead, untested code: convertFrom*ExtendedInteger

Added: 
    

Modified: 
    llvm/include/llvm/ADT/APFloat.h
    llvm/lib/Support/APFloat.cpp
    llvm/unittests/ADT/APFloatTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index b5429db382e2c..6bc62845fddf4 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -460,12 +460,6 @@ class IEEEFloat final {
   LLVM_ABI opStatus convertToInteger(MutableArrayRef<integerPart>, unsigned int,
                                      bool, roundingMode, bool *) const;
   LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode);
-  LLVM_ABI opStatus convertFromSignExtendedInteger(const integerPart *,
-                                                   unsigned int, bool,
-                                                   roundingMode);
-  LLVM_ABI opStatus convertFromZeroExtendedInteger(const integerPart *,
-                                                   unsigned int, bool,
-                                                   roundingMode);
   LLVM_ABI Expected<opStatus> convertFromString(StringRef, roundingMode);
   LLVM_ABI APInt bitcastToAPInt() const;
   LLVM_ABI double convertToDouble() const;
@@ -805,6 +799,16 @@ class DoubleAPFloat final {
                                         unsigned int Width, bool IsSigned,
                                         roundingMode RM, bool *IsExact) const;
 
+  // Convert an unsigned integer Src to a floating point number,
+  // rounding according to RM.  The sign of the floating point number is not
+  // modified.
+  opStatus convertFromUnsignedParts(const integerPart *Src,
+                                    unsigned int SrcCount, roundingMode RM);
+
+  // Handle overflow.  Sign is preserved.  We either become infinity or
+  // the largest finite number.
+  opStatus handleOverflow(roundingMode RM);
+
 public:
   LLVM_ABI DoubleAPFloat(const fltSemantics &S);
   LLVM_ABI DoubleAPFloat(const fltSemantics &S, uninitializedTag);
@@ -860,14 +864,6 @@ class DoubleAPFloat final {
                                      roundingMode RM, bool *IsExact) const;
   LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
                                      roundingMode RM);
-  LLVM_ABI opStatus convertFromSignExtendedInteger(const integerPart *Input,
-                                                   unsigned int InputSize,
-                                                   bool IsSigned,
-                                                   roundingMode RM);
-  LLVM_ABI opStatus convertFromZeroExtendedInteger(const integerPart *Input,
-                                                   unsigned int InputSize,
-                                                   bool IsSigned,
-                                                   roundingMode RM);
   LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits,
                                            bool UpperCase,
                                            roundingMode RM) const;
@@ -1344,22 +1340,15 @@ class APFloat : public APFloatBase {
   // the precision of the conversion.
   LLVM_ABI opStatus convertToInteger(APSInt &Result, roundingMode RM,
                                      bool *IsExact) const;
+
+  // Convert a two's complement integer Input to a floating point number,
+  // rounding according to RM.  IsSigned is true if the integer is signed,
+  // in which case it must be sign-extended.
   opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
                             roundingMode RM) {
     APFLOAT_DISPATCH_ON_SEMANTICS(convertFromAPInt(Input, IsSigned, RM));
   }
-  opStatus convertFromSignExtendedInteger(const integerPart *Input,
-                                          unsigned int InputSize, bool IsSigned,
-                                          roundingMode RM) {
-    APFLOAT_DISPATCH_ON_SEMANTICS(
-        convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM));
-  }
-  opStatus convertFromZeroExtendedInteger(const integerPart *Input,
-                                          unsigned int InputSize, bool IsSigned,
-                                          roundingMode RM) {
-    APFLOAT_DISPATCH_ON_SEMANTICS(
-        convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM));
-  }
+
   LLVM_ABI Expected<opStatus> convertFromString(StringRef, roundingMode);
   APInt bitcastToAPInt() const {
     APFLOAT_DISPATCH_ON_SEMANTICS(bitcastToAPInt());

diff  --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index aa5b3c78ea5f5..d14abb4bd05b5 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -2927,51 +2927,6 @@ APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
 }
 
-/* Convert a two's complement integer SRC to a floating point number,
-   rounding according to ROUNDING_MODE.  ISSIGNED is true if the
-   integer is signed, in which case it must be sign-extended.  */
-APFloat::opStatus
-IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
-                                          unsigned int srcCount, bool isSigned,
-                                          roundingMode rounding_mode) {
-  opStatus status;
-
-  if (isSigned &&
-      APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
-    integerPart *copy;
-
-    /* If we're signed and negative negate a copy.  */
-    sign = true;
-    copy = new integerPart[srcCount];
-    APInt::tcAssign(copy, src, srcCount);
-    APInt::tcNegate(copy, srcCount);
-    status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
-    delete [] copy;
-  } else {
-    sign = false;
-    status = convertFromUnsignedParts(src, srcCount, rounding_mode);
-  }
-
-  return status;
-}
-
-/* FIXME: should this just take a const APInt reference?  */
-APFloat::opStatus
-IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
-                                          unsigned int width, bool isSigned,
-                                          roundingMode rounding_mode) {
-  unsigned int partCount = partCountForBits(width);
-  APInt api = APInt(width, ArrayRef(parts, partCount));
-
-  sign = false;
-  if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
-    sign = true;
-    api = -api;
-  }
-
-  return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
-}
-
 Expected<APFloat::opStatus>
 IEEEFloat::convertFromHexadecimalString(StringRef s,
                                         roundingMode rounding_mode) {
@@ -5648,36 +5603,158 @@ DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
   return FS;
 }
 
-APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
-                                                  bool IsSigned,
-                                                  roundingMode RM) {
-  assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
-  APFloat Tmp(semPPCDoubleDoubleLegacy);
-  auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
-  *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
-  return Ret;
-}
+APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
+  switch (RM) {
+  case APFloat::rmTowardZero:
+    makeLargest(/*Neg=*/isNegative());
+    break;
+  case APFloat::rmTowardNegative:
+    if (isNegative())
+      makeInf(/*Neg=*/true);
+    else
+      makeLargest(/*Neg=*/false);
+    break;
+  case APFloat::rmTowardPositive:
+    if (isNegative())
+      makeLargest(/*Neg=*/true);
+    else
+      makeInf(/*Neg=*/false);
+    break;
+  case APFloat::rmNearestTiesToAway:
+  case APFloat::rmNearestTiesToEven:
+    makeInf(/*Neg=*/isNegative());
+    break;
+  default:
+    llvm_unreachable("Invalid rounding mode found");
+  }
+  opStatus S = opInexact;
+  if (!getFirst().isFinite())
+    S = static_cast<opStatus>(S | opOverflow);
+  return S;
+}
+
+APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
+    const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
+  // Find the most significant bit of the source integer. APInt::tcMSB returns
+  // UINT_MAX for a zero value.
+  const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
+  if (SrcMSB == UINT_MAX) {
+    // The source integer is 0.
+    makeZero(/*Neg=*/false);
+    return opOK;
+  }
 
-APFloat::opStatus
-DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
-                                              unsigned int InputSize,
-                                              bool IsSigned, roundingMode RM) {
-  assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
-  APFloat Tmp(semPPCDoubleDoubleLegacy);
-  auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
-  *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
-  return Ret;
+  // Create a minimally-sized APInt to represent the source value.
+  const unsigned SrcBitWidth = SrcMSB + 1;
+  APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth,
+                      /*numWords=*/SrcCount, Src},
+                /*isUnsigned=*/true};
+
+  // Stage 1: Initial Approximation.
+  // Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
+  // We use round-to-nearest because it minimizes the initial error, which is
+  // crucial for the subsequent steps.
+  APFloat Hi{getFirst().getSemantics()};
+  Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
+
+  // If the first approximation already overflows, the number is too large.
+  // NOTE: The underlying semantics are *more* conservative when choosing to
+  // overflow because their notion of ULP is much larger. As such, it is always
+  // safe to overflow at the DoubleAPFloat level if the APFloat overflows.
+  if (!Hi.isFinite())
+    return handleOverflow(RM);
+
+  // Stage 2: Exact Error Calculation.
+  // Calculate the exact error of the first approximation: Error = SrcInt - Hi.
+  // This is done by converting Hi back to an integer and subtracting it from
+  // the original source.
+  bool HiAsIntIsExact;
+  // Create an integer representation of Hi. Its width is determined by the
+  // exponent of Hi, ensuring it's just large enough. This width can exceed
+  // SrcBitWidth if the conversion to Hi rounded up to a power of two.
+  // accurately when converted back to an integer.
+  APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
+  Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
+  const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
+
+  // Stage 3: Error Approximation and Rounding.
+  // Convert the integer error into the Lo part of the DoubleAPFloat. This step
+  // captures the remainder of the original number. The rounding mode for this
+  // conversion (LoRM) may need to be adjusted from the user-requested RM to
+  // ensure the final sum (Hi + Lo) rounds correctly.
+  roundingMode LoRM = RM;
+  // Adjustments are only necessary when the initial approximation Hi was an
+  // overestimate, making the Error negative.
+  if (Error.isNegative()) {
+    if (RM == rmNearestTiesToAway) {
+      // For rmNearestTiesToAway, a tie should round away from zero. Since
+      // SrcInt is positive, this means rounding toward +infinity.
+      // A standard conversion of a negative Error would round ties toward
+      // -infinity, causing the final sum Hi + Lo to be smaller. To
+      // counteract this, we detect the tie case and override the rounding
+      // mode for Lo to rmTowardPositive.
+      const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
+      const unsigned LoPrecision = getSecond().getSemantics().precision;
+      if (ErrorActiveBits > LoPrecision) {
+        const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
+        // A tie occurs when the bits to be truncated are of the form 100...0.
+        // This is detected by checking if the number of trailing zeros is
+        // exactly one less than the number of bits being truncated.
+        if (Error.countTrailingZeros() == RoundingBoundary - 1)
+          LoRM = rmTowardPositive;
+      }
+    } else if (RM == rmTowardZero) {
+      // For rmTowardZero, the final positive result must be truncated (rounded
+      // down). When Hi is an overestimate, Error is negative. A standard
+      // rmTowardZero conversion of Error would make it *less* negative,
+      // effectively rounding the final sum Hi + Lo *up*. To ensure the sum
+      // rounds down correctly, we force Lo to round toward -infinity.
+      LoRM = rmTowardNegative;
+    }
+  }
+
+  APFloat Lo{getSecond().getSemantics()};
+  opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
+
+  // Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
+  // components do not overlap. fastTwoSum performs this operation.
+  std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
+  Floats[0] = std::move(Hi);
+  Floats[1] = std::move(Lo);
+
+  // A final check for overflow is needed because fastTwoSum can cause a
+  // carry-out from Lo that pushes Hi to infinity.
+  if (!getFirst().isFinite())
+    return handleOverflow(RM);
+
+  // The largest DoubleAPFloat must be canonical. Values which are larger are
+  // not canonical and are equivalent to overflow.
+  if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
+    DoubleAPFloat Largest{*Semantics};
+    Largest.makeLargest(/*Neg=*/false);
+    if (compare(Largest) == APFloat::cmpGreaterThan)
+      return handleOverflow(RM);
+  }
+
+  // The final status of the operation is determined by the conversion of the
+  // error term. If Lo could represent Error exactly, the entire conversion
+  // is exact. Otherwise, it's inexact.
+  return Status;
 }
 
-APFloat::opStatus
-DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
-                                              unsigned int InputSize,
-                                              bool IsSigned, roundingMode RM) {
-  assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
-  APFloat Tmp(semPPCDoubleDoubleLegacy);
-  auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
-  *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
-  return Ret;
+APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
+                                                  bool IsSigned,
+                                                  roundingMode RM) {
+  const bool NegateInput = IsSigned && Input.isNegative();
+  APInt API = Input;
+  if (NegateInput)
+    API.negate();
+
+  const APFloat::opStatus Status =
+      convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
+  if (NegateInput)
+    changeSign();
+  return Status;
 }
 
 unsigned int DoubleAPFloat::convertToHexString(char *DST,

diff  --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index b0435cc21da58..141282ea254b4 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1660,6 +1660,90 @@ TEST(APFloatTest, toInteger) {
   EXPECT_EQ(APSInt::getMaxValue(5, false), result);
 }
 
+class APFloatConvertFromAPIntParamTest
+    : public ::testing::TestWithParam<const fltSemantics *> {
+protected:
+  // Helper to run a conversion and compare the integer result directly.
+  static void testConversionAndCompareInt(const APInt &InputValue,
+                                          const bool IsSigned,
+                                          APFloat::roundingMode RM,
+                                          const APInt &ExpectedIntValue) {
+    const fltSemantics &Sem = *GetParam();
+    APFloat F(Sem);
+    F.convertFromAPInt(InputValue, /*IsSigned=*/IsSigned, RM);
+
+    APSInt ResultInt(InputValue.getBitWidth(), /*isUnsigned=*/!IsSigned);
+    bool IsExact;
+    F.convertToInteger(ResultInt, APFloat::rmTowardZero, &IsExact);
+
+    EXPECT_TRUE(IsExact);
+    EXPECT_TRUE(ResultInt.eq(ExpectedIntValue))
+        << "InputValue: " << InputValue << "\n"
+        << ResultInt << " vs " << ExpectedIntValue << "\n";
+  }
+};
+
+TEST_P(APFloatConvertFromAPIntParamTest, HalfwayRounding) {
+  const fltSemantics &Sem = *GetParam();
+  const unsigned Precision = APFloat::semanticsPrecision(Sem);
+
+  if (Precision == 0)
+    GTEST_SKIP() << "Skipping test for semantics with no significand.";
+
+  for (bool IsSigned : {false, true}) {
+    const unsigned BitWidth = Precision + 1 + (IsSigned ? 1 : 0);
+
+    const APInt RoundedDownVal = APInt::getOneBitSet(BitWidth, Precision);
+    const APInt HalfwayVal = RoundedDownVal + 1;
+    const APInt RoundedUpVal = RoundedDownVal + 2;
+
+    testConversionAndCompareInt(HalfwayVal, IsSigned,
+                                APFloat::rmNearestTiesToEven, RoundedDownVal);
+    testConversionAndCompareInt(HalfwayVal, IsSigned,
+                                APFloat::rmNearestTiesToAway, RoundedUpVal);
+    testConversionAndCompareInt(HalfwayVal, IsSigned, APFloat::rmTowardPositive,
+                                RoundedUpVal);
+    testConversionAndCompareInt(HalfwayVal, IsSigned, APFloat::rmTowardNegative,
+                                RoundedDownVal);
+    testConversionAndCompareInt(HalfwayVal, IsSigned, APFloat::rmTowardZero,
+                                RoundedDownVal);
+  }
+}
+
+TEST_P(APFloatConvertFromAPIntParamTest, MaxMagnitude) {
+  const fltSemantics &Sem = *GetParam();
+  const unsigned Precision = APFloat::semanticsPrecision(Sem);
+
+  if (Precision == 0)
+    GTEST_SKIP() << "Skipping test for semantics with no significand.";
+
+  const APFloat Largest = APFloat::getLargest(Sem, /*Negative=*/false);
+  const int Exp = ilogb(Largest);
+  for (bool IsSigned : {false, true}) {
+    const unsigned BitWidth = Exp + 1 + (IsSigned ? 1 : 0);
+
+    bool IsExact;
+    APSInt LargestAsInt{BitWidth, /*IsUnsigned=*/!IsSigned};
+    const APFloat::opStatus ToIntStatus =
+        Largest.convertToInteger(LargestAsInt, APFloat::rmTowardZero, &IsExact);
+    EXPECT_EQ(ToIntStatus, APFloat::opOK);
+
+    for (const APFloat::roundingMode RM :
+         {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+          APFloat::rmTowardPositive, APFloat::rmTowardZero,
+          APFloat::rmNearestTiesToEven}) {
+      testConversionAndCompareInt(LargestAsInt, IsSigned, RM, LargestAsInt);
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(IEEESemantics, APFloatConvertFromAPIntParamTest,
+                         ::testing::Values(&APFloat::IEEEhalf(),
+                                           &APFloat::BFloat(),
+                                           &APFloat::IEEEsingle(),
+                                           &APFloat::IEEEdouble(),
+                                           &APFloat::IEEEquad()));
+
 static APInt nanbitsFromAPInt(const fltSemantics &Sem, bool SNaN, bool Negative,
                               uint64_t payload) {
   APInt appayload(64, payload);
@@ -7039,6 +7123,249 @@ TEST(APFloatTest, PPCDoubleDoubleNext) {
   EXPECT_FALSE(Test.isDenormal());
 }
 
+TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntInexact) {
+  // Create an integer which would not be exactly representable in
+  // PPCDoubleDoubleLegacy.
+  for (bool IsSigned : {false, true}) {
+    const unsigned BitWidth =
+        APFloat::semanticsPrecision(APFloat::IEEEdouble()) * 3 +
+        (IsSigned ? 1 : 0);
+
+    for (bool Negative :
+         IsSigned ? std::vector{false, true} : std::vector{false}) {
+      APInt Huge = APInt{BitWidth, 0};
+      // Set the highest bit without making Huge negative..
+      Huge.setBit(BitWidth - (IsSigned ? 2 : 1));
+      // Set the low bit.
+      Huge.setBit(0);
+      if (Negative)
+        Huge.negate();
+      for (const APFloat::roundingMode RM :
+           {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+            APFloat::rmTowardPositive, APFloat::rmTowardZero,
+            APFloat::rmNearestTiesToEven}) {
+        APFloat F{APFloat::PPCDoubleDouble()};
+        const APFloat::opStatus ConvertFromStatus =
+            F.convertFromAPInt(Huge, /*IsSigned=*/IsSigned, RM);
+        EXPECT_EQ(ConvertFromStatus, APFloat::opOK);
+
+        bool IsExact;
+        APSInt ResultInt{Huge.getBitWidth(), /*isUnsigned=*/!IsSigned};
+        const APFloat::opStatus ConvertToStatus =
+            F.convertToInteger(ResultInt, APFloat::rmTowardZero, &IsExact);
+
+        EXPECT_TRUE(IsExact) << "RM: " << RM;
+        EXPECT_TRUE(ResultInt.eq(Huge)) << ResultInt << " vs " << Huge << "\n";
+        EXPECT_EQ(ConvertToStatus, APFloat::opOK);
+      }
+    }
+  }
+}
+
+TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntBoundary) {
+  const unsigned Binary64Precision =
+      APFloat::semanticsPrecision(APFloat::IEEEdouble());
+  APSInt Boundary =
+      APSInt::getMaxValue(Binary64Precision + 1, /*Unsigned=*/true);
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    const APFloat Exact = makeDoubleAPFloat(0x1p54, -0x1p0);
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(Boundary, /*IsSigned=*/false, RM);
+    EXPECT_EQ(ConvertFromStatus, APFloat::opOK);
+    EXPECT_EQ(F, Exact);
+  }
+
+  Boundary = APSInt{APInt::getHighBitsSet(/*numBits=*/128,
+                                          /*hiBitsSet=*/Binary64Precision + 1),
+                    /*isUnsigned=*/true};
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    const APFloat Exact = makeDoubleAPFloat(0x1p128, -0x1p74);
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(Boundary, /*IsSigned=*/false, RM);
+    EXPECT_EQ(ConvertFromStatus, APFloat::opOK);
+    EXPECT_EQ(F, Exact);
+  }
+}
+
+TEST(APFloatTest, PPCDoubleDoubleConvertFromAPIntEnormous) {
+  APFloat Largest = APFloat::getLargest(APFloat::PPCDoubleDouble());
+  int Exponent = ilogb(Largest);
+  unsigned BitWidth = Exponent + 1;
+  APSInt HugeInt{BitWidth, /*isUnsigned=*/true};
+  bool IsExact;
+  APFloat::opStatus Status =
+      Largest.convertToInteger(HugeInt, APFloat::rmTowardPositive, &IsExact);
+  ASSERT_EQ(Status, APFloat::opOK);
+  ASSERT_TRUE(IsExact);
+
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    EXPECT_EQ(ConvertFromStatus, APFloat::opOK);
+    EXPECT_EQ(F, Largest);
+  }
+
+  const unsigned MaxExponent =
+      APFloat::semanticsMaxExponent(APFloat::IEEEdouble());
+  const unsigned Binary64Precision =
+      APFloat::semanticsPrecision(APFloat::IEEEdouble());
+  const unsigned UlpOfLargest = MaxExponent - (2 * Binary64Precision);
+  const unsigned HalfUlpOfLargest = UlpOfLargest - 1;
+
+  // Add just under a half-ulp. This should never overflow for
+  // round-ties-to-nearest modes.
+  HugeInt.setLowBits(HalfUlpOfLargest);
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    if (RM == APFloat::rmTowardPositive) {
+      EXPECT_TRUE(F.isPosInfinity()) << F;
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow);
+    } else {
+      EXPECT_EQ(F, Largest);
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact);
+    }
+  }
+
+  // Now test adding a half-ulp. This should cause overflow for ties-to-away.
+  // ties-to-even will not overflow if the max finite value has a clear low bit.
+  ++HugeInt;
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    const bool Overflow =
+        RM == APFloat::rmTowardPositive || RM == APFloat::rmNearestTiesToAway ||
+        (RM == APFloat::rmNearestTiesToEven && HugeInt[UlpOfLargest]);
+    if (Overflow) {
+      EXPECT_TRUE(F.isPosInfinity()) << F;
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow);
+    } else {
+      EXPECT_EQ(F, Largest);
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact);
+    }
+  }
+
+  // Now test adding just over a half-ulp. This should break all ties.
+  ++HugeInt;
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    const bool Overflow = RM == APFloat::rmTowardPositive ||
+                          RM == APFloat::rmNearestTiesToAway ||
+                          RM == APFloat::rmNearestTiesToEven;
+    if (Overflow) {
+      EXPECT_TRUE(F.isPosInfinity()) << F;
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow);
+    } else {
+      EXPECT_EQ(F, Largest);
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact);
+    }
+  }
+
+  HugeInt.setAllBits();
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    const bool Overflow = RM == APFloat::rmTowardPositive ||
+                          RM == APFloat::rmNearestTiesToAway ||
+                          RM == APFloat::rmNearestTiesToEven;
+    if (Overflow) {
+      EXPECT_TRUE(F.isPosInfinity()) << F;
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact | APFloat::opOverflow);
+    } else {
+      EXPECT_EQ(F, Largest);
+      EXPECT_EQ(ConvertFromStatus, APFloat::opInexact);
+    }
+  }
+
+  HugeInt.clearAllBits();
+  HugeInt.setBit(2 * Binary64Precision + 1);
+  HugeInt.setLowBits(Binary64Precision + 1);
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    const APFloat RoundUp = makeDoubleAPFloat(0x1p107, 0x1p54);
+    const APFloat RoundDown = makeDoubleAPFloat(0x1p107, 0x1.fffffffffffffp53);
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    EXPECT_EQ(ConvertFromStatus, APFloat::opInexact);
+
+    if (RM == APFloat::rmNearestTiesToEven ||
+        RM == APFloat::rmNearestTiesToAway || RM == APFloat::rmTowardPositive)
+      EXPECT_EQ(F, RoundUp);
+    else
+      EXPECT_EQ(F, RoundDown);
+  }
+
+  ++HugeInt;
+  // 162259276829213381405976519770112 can be represented exactly.
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    const APFloat Exact = makeDoubleAPFloat(0x1p107, 0x1p54);
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    EXPECT_EQ(ConvertFromStatus, APFloat::opOK);
+    EXPECT_EQ(F, Exact);
+  }
+
+  ++HugeInt;
+  // 162259276829213381405976519770113 rounds to either:
+  // 162259276829213381405976519770112
+  // 162259276829213381405976519770114
+  for (const APFloat::roundingMode RM :
+       {APFloat::rmNearestTiesToAway, APFloat::rmTowardNegative,
+        APFloat::rmTowardPositive, APFloat::rmTowardZero,
+        APFloat::rmNearestTiesToEven}) {
+    const APFloat RoundUp =
+        makeDoubleAPFloat(0x1.0000000000001p107, -0x1.fffffffffffffp53);
+    const APFloat RoundDown = makeDoubleAPFloat(0x1p107, 0x1p54);
+    EXPECT_LT(RoundDown, RoundUp);
+
+    APFloat F{APFloat::PPCDoubleDouble()};
+    const APFloat::opStatus ConvertFromStatus =
+        F.convertFromAPInt(HugeInt, /*IsSigned=*/false, RM);
+    EXPECT_EQ(ConvertFromStatus, APFloat::opInexact);
+
+    if (RM == APFloat::rmNearestTiesToAway || RM == APFloat::rmTowardPositive)
+      EXPECT_EQ(F, RoundUp);
+    else
+      EXPECT_EQ(F, RoundDown);
+  }
+}
+
 TEST(APFloatTest, x87Largest) {
   APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended());
   EXPECT_TRUE(MaxX87Val.isLargest());


        


More information about the llvm-commits mailing list