[llvm] ed6c309 - [APFloat] Fix truncation of certain subnormal numbers
Danila Malyutin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 8 11:55:01 PDT 2022
Author: Danila Malyutin
Date: 2022-06-08T21:54:35+03:00
New Revision: ed6c309d4bf60b8a6abcf37a4e9d5b4bef96191b
URL: https://github.com/llvm/llvm-project/commit/ed6c309d4bf60b8a6abcf37a4e9d5b4bef96191b
DIFF: https://github.com/llvm/llvm-project/commit/ed6c309d4bf60b8a6abcf37a4e9d5b4bef96191b.diff
LOG: [APFloat] Fix truncation of certain subnormal numbers
Certain subnormals would be incorrectly rounded away from zero.
Fixes #55838
Differential Revision: https://reviews.llvm.org/D127140
Added:
Modified:
llvm/lib/Support/APFloat.cpp
llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
llvm/unittests/ADT/APFloatTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 4b75c9db85263..2ae28fe066cd7 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -2213,8 +2213,11 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
// when truncating from PowerPC double-double to double format), the
// right shift could lose result mantissa bits. Adjust exponent instead
// of performing excessive shift.
+ // Also do a similar trick in case shifting denormal would produce zero
+ // significand as this case isn't handled correctly by normalize.
if (shift < 0 && isFiniteNonZero()) {
- int exponentChange = significandMSB() + 1 - fromSemantics.precision;
+ int omsb = significandMSB() + 1;
+ int exponentChange = omsb - fromSemantics.precision;
if (exponent + exponentChange < toSemantics.minExponent)
exponentChange = toSemantics.minExponent - exponent;
if (exponentChange < shift)
@@ -2222,6 +2225,10 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
if (exponentChange < 0) {
shift -= exponentChange;
exponent += exponentChange;
+ } else if (omsb <= -shift) {
+ exponentChange = omsb + shift - 1; // leave at least one bit set
+ shift -= exponentChange;
+ exponent += exponentChange;
}
}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
index b977ef544090b..a2ed8cd6178d8 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
@@ -79,21 +79,17 @@ define float @trunc_denorm_lost_fraction0() {
ret float %b
}
-; FIXME: This should be 0.0.
-
define float @trunc_denorm_lost_fraction1() {
; CHECK-LABEL: @trunc_denorm_lost_fraction1(
-; CHECK-NEXT: ret float 0x36A0000000000000
+; CHECK-NEXT: ret float 0.000000e+00
;
%b = fptrunc double 0x0000000010000001 to float
ret float %b
}
-; FIXME: This should be 0.0.
-
define float @trunc_denorm_lost_fraction2() {
; CHECK-LABEL: @trunc_denorm_lost_fraction2(
-; CHECK-NEXT: ret float 0x36A0000000000000
+; CHECK-NEXT: ret float 0.000000e+00
;
%b = fptrunc double 0x000000001fffffff to float
ret float %b
@@ -107,11 +103,9 @@ define float @trunc_denorm_lost_fraction3() {
ret float %b
}
-; FIXME: This should be -0.0.
-
define float @trunc_denorm_lost_fraction4() {
; CHECK-LABEL: @trunc_denorm_lost_fraction4(
-; CHECK-NEXT: ret float 0xB6A0000000000000
+; CHECK-NEXT: ret float -0.000000e+00
;
%b = fptrunc double 0x8000000010000001 to float
ret float %b
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 1683a9d671734..a53eabedb63f8 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1859,6 +1859,48 @@ TEST(APFloatTest, convert) {
EXPECT_EQ(0x7fc00000, test.bitcastToAPInt());
EXPECT_TRUE(losesInfo);
EXPECT_EQ(status, APFloat::opOK);
+
+ // Test that subnormals are handled correctly in double to float conversion
+ test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022");
+ test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+
+ test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022");
+ test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+
+ test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022");
+ test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+
+ test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022");
+ test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+
+ test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022");
+ test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+
+ // Test subnormal conversion to bfloat
+ test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
+ test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0.0f, test.convertToFloat());
+ EXPECT_TRUE(losesInfo);
+
+ test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126");
+ test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
+ EXPECT_EQ(0x01, test.bitcastToAPInt());
+ EXPECT_FALSE(losesInfo);
+
+ test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
+ test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
+ EXPECT_EQ(0x01, test.bitcastToAPInt());
+ EXPECT_TRUE(losesInfo);
}
TEST(APFloatTest, PPCDoubleDouble) {
More information about the llvm-commits
mailing list