[llvm] f494131 - [APFloat] Fix getExactInverse for DoubleAPFloat

David Majnemer via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 13 11:39:59 PDT 2025


Author: David Majnemer
Date: 2025-08-13T11:39:53-07:00
New Revision: f4941319cba19d7691baa6ec783c84be4d847637

URL: https://github.com/llvm/llvm-project/commit/f4941319cba19d7691baa6ec783c84be4d847637
DIFF: https://github.com/llvm/llvm-project/commit/f4941319cba19d7691baa6ec783c84be4d847637.diff

LOG: [APFloat] Fix getExactInverse for DoubleAPFloat

Some background: getExactInverse()'s callers expect that
the result is not subnormal.

DoubleAPFloat implemented getExactInverse() by going through
semPPCDoubleDoubleLegacy.

This means that numbers like 0x1p1022 which would have a normal inverse
in semPPCDoubleDouble would not in semPPCDoubleDoubleLegacy.

This commit refactors the logic into a single method on APFloat which
uses getExactLog2Abs() and scalbn() to calculate the inverse without
having to compute a reciprocal and test if it is inexact.
This approach works for both IEEEFloat and DoubleAPFloat.

Added: 
    

Modified: 
    llvm/include/llvm/ADT/APFloat.h
    llvm/lib/Support/APFloat.cpp
    llvm/unittests/ADT/APFloatTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 5e59ca9950226..f8bd995c9e28f 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -605,10 +605,6 @@ class IEEEFloat final {
                          unsigned FormatMaxPadding = 3,
                          bool TruncateZero = true) const;
 
-  /// If this value has an exact multiplicative inverse, store it in inv and
-  /// return true.
-  LLVM_ABI bool getExactInverse(APFloat *inv) const;
-
   LLVM_ABI LLVM_READONLY int getExactLog2Abs() const;
 
   LLVM_ABI friend int ilogb(const IEEEFloat &Arg);
@@ -886,8 +882,6 @@ class DoubleAPFloat final {
                          unsigned FormatMaxPadding,
                          bool TruncateZero = true) const;
 
-  LLVM_ABI bool getExactInverse(APFloat *inv) const;
-
   LLVM_ABI LLVM_READONLY int getExactLog2Abs() const;
 
   LLVM_ABI friend int ilogb(const DoubleAPFloat &X);
@@ -1500,9 +1494,9 @@ class APFloat : public APFloatBase {
   LLVM_DUMP_METHOD void dump() const;
 #endif
 
-  bool getExactInverse(APFloat *inv) const {
-    APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv));
-  }
+  /// If this value has an exact, normal, multiplicative inverse, store it in
+  /// inv and return true.
+  bool getExactInverse(APFloat *Inv) const;
 
   // If this is an exact power of two, return the exponent while ignoring the
   // sign bit. If it's not an exact power of 2, return INT_MIN

diff  --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index d2a417ff87915..498d783c3a63a 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -4575,35 +4575,6 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
 
 }
 
-bool IEEEFloat::getExactInverse(APFloat *inv) const {
-  // Special floats and denormals have no exact inverse.
-  if (!isFiniteNonZero())
-    return false;
-
-  // Check that the number is a power of two by making sure that only the
-  // integer bit is set in the significand.
-  if (significandLSB() != semantics->precision - 1)
-    return false;
-
-  // Get the inverse.
-  IEEEFloat reciprocal(*semantics, 1ULL);
-  if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
-    return false;
-
-  // Avoid multiplication with a denormal, it is not safe on all platforms and
-  // may be slower than a normal division.
-  if (reciprocal.isDenormal())
-    return false;
-
-  assert(reciprocal.isFiniteNonZero() &&
-         reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
-
-  if (inv)
-    *inv = APFloat(reciprocal, *semantics);
-
-  return true;
-}
-
 int IEEEFloat::getExactLog2Abs() const {
   if (!isFinite() || isZero())
     return INT_MIN;
@@ -5731,17 +5702,6 @@ void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
 }
 
-bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
-  assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
-  APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
-  if (!inv)
-    return Tmp.getExactInverse(nullptr);
-  APFloat Inv(semPPCDoubleDoubleLegacy);
-  auto Ret = Tmp.getExactInverse(&Inv);
-  *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
-  return Ret;
-}
-
 int DoubleAPFloat::getExactLog2Abs() const {
   // In order for Hi + Lo to be a power of two, the following must be true:
   // 1. Hi must be a power of two.
@@ -5926,6 +5886,58 @@ FPClassTest APFloat::classify() const {
   return isSignaling() ? fcSNan : fcQNan;
 }
 
+bool APFloat::getExactInverse(APFloat *Inv) const {
+  // Only finite, non-zero numbers can have a useful, representable inverse.
+  // This check filters out +/- zero, +/- infinity, and NaN.
+  if (!isFiniteNonZero())
+    return false;
+
+  // A number has an exact, representable inverse if and only if it is a power
+  // of two.
+  //
+  // Mathematical Rationale:
+  // 1. A binary floating-point number x is a dyadic rational, meaning it can
+  //    be written as x = M / 2^k for integers M (the significand) and k.
+  // 2. The inverse is 1/x = 2^k / M.
+  // 3. For 1/x to also be a dyadic rational (and thus exactly representable
+  //    in binary), its denominator M must also be a power of two.
+  //    Let's say M = 2^m.
+  // 4. Substituting this back into the formula for x, we get
+  //    x = (2^m) / (2^k) = 2^(m-k).
+  //
+  // This proves that x must be a power of two.
+
+  // getExactLog2Abs() returns the integer exponent if the number is a power of
+  // two or INT_MIN if it is not.
+  const int Exp = getExactLog2Abs();
+  if (Exp == INT_MIN)
+    return false;
+
+  // The inverse of +/- 2^Exp is +/- 2^(-Exp). We can compute this by
+  // scaling 1.0 by the negated exponent.
+  APFloat Reciprocal =
+      scalbn(APFloat::getOne(getSemantics(), /*Negative=*/isNegative()), -Exp,
+             rmTowardZero);
+
+  // scalbn might round if the resulting exponent -Exp is outside the
+  // representable range, causing overflow (to infinity) or underflow. We
+  // must verify that the result is still the exact power of two we expect.
+  if (Reciprocal.getExactLog2Abs() != -Exp)
+    return false;
+
+  // Avoid multiplication with a subnormal, it is not safe on all platforms and
+  // may be slower than a normal division.
+  if (Reciprocal.isDenormal())
+    return false;
+
+  assert(Reciprocal.isFiniteNonZero());
+
+  if (Inv)
+    *Inv = std::move(Reciprocal);
+
+  return true;
+}
+
 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
                                    roundingMode RM, bool *losesInfo) {
   if (&getSemantics() == &ToSemantics) {

diff  --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index b518ef854d9a1..34e2b1a5f4b20 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1918,6 +1918,15 @@ TEST(APFloatTest, exactInverse) {
   EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0.5")));
   EXPECT_TRUE(APFloat(APFloat::x87DoubleExtended(), "2.0").getExactInverse(&inv));
   EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::x87DoubleExtended(), "0.5")));
+  // 0x1p1022 has a normal inverse for IEEE 754 binary64: 0x1p-1022.
+  EXPECT_TRUE(APFloat(0x1p1022).getExactInverse(&inv));
+  EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0x1p-1022)));
+  // With regards to getExactInverse, IEEEdouble and PPCDoubleDouble should
+  // behave the same.
+  EXPECT_TRUE(
+      APFloat(APFloat::PPCDoubleDouble(), "0x1p1022").getExactInverse(&inv));
+  EXPECT_TRUE(
+      inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0x1p-1022")));
 
   // FLT_MIN
   EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv));
@@ -6661,13 +6670,12 @@ TEST_P(PPCDoubleDoubleFrexpValueTest, PPCDoubleDoubleFrexp) {
 
     int ActualExponent;
     const APFloat ActualFraction = frexp(Input, ActualExponent, RM);
-    if (ExpectedFraction.isNaN()) {
+    if (ExpectedFraction.isNaN())
       EXPECT_TRUE(ActualFraction.isNaN());
-    } else {
+    else
       EXPECT_EQ(ActualFraction.compare(ExpectedFraction), APFloat::cmpEqual)
           << ActualFraction << " vs " << ExpectedFraction << " for input "
           << Params.Input.Hi << " + " << Params.Input.Lo << " RM " << RM;
-    }
     EXPECT_EQ(ActualExponent, Expected.Exponent)
         << "for input " << Params.Input.Hi << " + " << Params.Input.Lo
         << " RM " << RM;


        


More information about the llvm-commits mailing list