[llvm] [APFloat] Fix `IEEEFloat::addOrSubtractSignificand` and `IEEEFloat::normalize` (PR #98721)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 20 06:52:01 PDT 2024
https://github.com/beetrees updated https://github.com/llvm/llvm-project/pull/98721
>From 3f36539fc76dd544f2c90b2fff32d29ae3af2f71 Mon Sep 17 00:00:00 2001
From: beetrees <b at beetr.ee>
Date: Sat, 13 Jul 2024 07:50:08 +0100
Subject: [PATCH] [APFloat] Fix `IEEEFloat::addOrSubtractSignificand` and
`IEEEFloat::normalize`
---
llvm/lib/Support/APFloat.cpp | 51 ++++++++++++++------
llvm/unittests/ADT/APFloatTest.cpp | 75 ++++++++++++++++++++++++++++++
2 files changed, 111 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 3664de71d06df..67ca377ded23d 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -1607,7 +1607,8 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
/* Before rounding normalize the exponent of fcNormal numbers. */
omsb = significandMSB() + 1;
- if (omsb) {
+ // Only skip this `if` if the value is exactly zero.
+ if (omsb || lost_fraction != lfExactlyZero) {
/* OMSB is numbered from 1. We want to place it in the integer
bit numbered PRECISION if possible, with a compensating change in
the exponent. */
@@ -1782,7 +1783,7 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
/* Add or subtract two normal numbers. */
lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
bool subtract) {
- integerPart carry;
+ integerPart carry = 0;
lostFraction lost_fraction;
int bits;
@@ -1796,11 +1797,13 @@ lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
/* Subtraction is more subtle than one might naively expect. */
if (subtract) {
IEEEFloat temp_rhs(rhs);
+ bool lost_fraction_is_from_rhs = false;
if (bits == 0)
lost_fraction = lfExactlyZero;
else if (bits > 0) {
lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
+ lost_fraction_is_from_rhs = true;
shiftSignificandLeft(1);
} else {
lost_fraction = shiftSignificandRight(-bits - 1);
@@ -1808,23 +1811,41 @@ lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
}
// Should we reverse the subtraction.
- if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
- carry = temp_rhs.subtractSignificand
- (*this, lost_fraction != lfExactlyZero);
+ cmpResult cmp_result = compareAbsoluteValue(temp_rhs);
+ if (cmp_result == cmpLessThan) {
+ bool borrow = false;
+ if (lost_fraction != lfExactlyZero && !lost_fraction_is_from_rhs) {
+ // The lost fraction is being subtracted, borrow from the significand
+ // and invert `lost_fraction`.
+ borrow = true;
+ if (lost_fraction == lfLessThanHalf)
+ lost_fraction = lfMoreThanHalf;
+ else if (lost_fraction == lfMoreThanHalf)
+ lost_fraction = lfLessThanHalf;
+ }
+ carry = temp_rhs.subtractSignificand(*this, borrow);
copySignificand(temp_rhs);
sign = !sign;
- } else {
- carry = subtractSignificand
- (temp_rhs, lost_fraction != lfExactlyZero);
+ } else if (cmp_result == cmpGreaterThan) {
+ bool borrow = false;
+ if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
+ // The lost fraction is being subtracted, borrow from the significand
+ // and invert `lost_fraction`.
+ borrow = true;
+ if (lost_fraction == lfLessThanHalf)
+ lost_fraction = lfMoreThanHalf;
+ else if (lost_fraction == lfMoreThanHalf)
+ lost_fraction = lfLessThanHalf;
+ }
+ carry = subtractSignificand(temp_rhs, borrow);
+ } else { // cmpEqual
+ zeroSignificand();
+ if (lost_fraction != lfExactlyZero && lost_fraction_is_from_rhs) {
+ // rhs is slightly larger due to the lost fraction, flip the sign.
+ sign = !sign;
+ }
}
- /* Invert the lost fraction - it was on the RHS and
- subtracted. */
- if (lost_fraction == lfLessThanHalf)
- lost_fraction = lfMoreThanHalf;
- else if (lost_fraction == lfMoreThanHalf)
- lost_fraction = lfLessThanHalf;
-
/* The code above is intended to ensure that no borrow is
necessary. */
assert(!carry);
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 86a25f4394e19..6be4cfc33c3e4 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -560,6 +560,81 @@ TEST(APFloatTest, FMA) {
EXPECT_EQ(-8.85242279E-41f, f1.convertToFloat());
}
+ // The `addOrSubtractSignificand` can be considered to have 9 possible cases
+ // when subtracting: all combinations of {cmpLessThan, cmpGreaterThan,
+ // cmpEqual} and {no loss, loss from lhs, loss from rhs}. Test each reachable
+ // case here.
+
+ // Regression test for failing the `assert(!carry)` in
+ // `addOrSubtractSignificand` and normalizing the exponent even when the
+ // significand is zero if there is a lost fraction.
+ // This tests cmpEqual, loss from lhs
+ {
+ APFloat f1(-1.4728589E-38f);
+ APFloat f2(3.7105144E-6f);
+ APFloat f3(5.5E-44f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(-0.0f, f1.convertToFloat());
+ }
+
+ // Test cmpGreaterThan, no loss
+ {
+ APFloat f1(2.0f);
+ APFloat f2(2.0f);
+ APFloat f3(-3.5f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(0.5f, f1.convertToFloat());
+ }
+
+ // Test cmpLessThan, no loss
+ {
+ APFloat f1(2.0f);
+ APFloat f2(2.0f);
+ APFloat f3(-4.5f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(-0.5f, f1.convertToFloat());
+ }
+
+ // Test cmpEqual, no loss
+ {
+ APFloat f1(2.0f);
+ APFloat f2(2.0f);
+ APFloat f3(-4.0f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(0.0f, f1.convertToFloat());
+ }
+
+ // Test cmpLessThan, loss from lhs
+ {
+ APFloat f1(2.0000002f);
+ APFloat f2(2.0000002f);
+ APFloat f3(-32.0f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(-27.999998f, f1.convertToFloat());
+ }
+
+ // Test cmpGreaterThan, loss from rhs
+ {
+ APFloat f1(1e10f);
+ APFloat f2(1e10f);
+ APFloat f3(-2.0000002f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(1e20f, f1.convertToFloat());
+ }
+
+ // Test cmpGreaterThan, loss from lhs
+ {
+ APFloat f1(1e-36f);
+ APFloat f2(0.0019531252f);
+ APFloat f3(-1e-45f);
+ f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(1.953124e-39f, f1.convertToFloat());
+ }
+
+ // {cmpEqual, cmpLessThan} with loss from rhs can't occur for the usage in
+ // `fusedMultiplyAdd` as `multiplySignificand` normalises the MSB of lhs to
+ // one bit below the top.
+
// Test using only a single instance of APFloat.
{
APFloat F(1.5);
More information about the llvm-commits
mailing list