[llvm] ed6c309 - [APFloat] Fix truncation of certain subnormal numbers

Wed Jun 8 11:55:01 PDT 2022

Author: Danila Malyutin
Date: 2022-06-08T21:54:35+03:00
New Revision: ed6c309d4bf60b8a6abcf37a4e9d5b4bef96191b

URL: https://github.com/llvm/llvm-project/commit/ed6c309d4bf60b8a6abcf37a4e9d5b4bef96191b
DIFF: https://github.com/llvm/llvm-project/commit/ed6c309d4bf60b8a6abcf37a4e9d5b4bef96191b.diff

LOG: [APFloat] Fix truncation of certain subnormal numbers

Certain subnormals would be incorrectly rounded away from zero.

Fixes #55838

Differential Revision: https://reviews.llvm.org/D127140

Added: 
    

Modified: 
    llvm/lib/Support/APFloat.cpp
    llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
    llvm/unittests/ADT/APFloatTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 4b75c9db85263..2ae28fe066cd7 100644

--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -2213,8 +2213,11 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
   // when truncating from PowerPC double-double to double format), the
   // right shift could lose result mantissa bits.  Adjust exponent instead
   // of performing excessive shift.
+  // Also do a similar trick in case shifting denormal would produce zero
+  // significand as this case isn't handled correctly by normalize.
   if (shift < 0 && isFiniteNonZero()) {
-    int exponentChange = significandMSB() + 1 - fromSemantics.precision;
+    int omsb = significandMSB() + 1;
+    int exponentChange = omsb - fromSemantics.precision;
     if (exponent + exponentChange < toSemantics.minExponent)
       exponentChange = toSemantics.minExponent - exponent;
     if (exponentChange < shift)
@@ -2222,6 +2225,10 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
     if (exponentChange < 0) {
       shift -= exponentChange;
       exponent += exponentChange;
+    } else if (omsb <= -shift) {
+      exponentChange = omsb + shift - 1; // leave at least one bit set
+      shift -= exponentChange;
+      exponent += exponentChange;
     }
   }
 

diff  --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
index b977ef544090b..a2ed8cd6178d8 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
@@ -79,21 +79,17 @@ define float @trunc_denorm_lost_fraction0() {
   ret float %b
 }
 
-; FIXME: This should be 0.0.
-
 define float @trunc_denorm_lost_fraction1() {
 ; CHECK-LABEL: @trunc_denorm_lost_fraction1(
-; CHECK-NEXT:    ret float 0x36A0000000000000
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %b = fptrunc double 0x0000000010000001 to float
   ret float %b
 }
 
-; FIXME: This should be 0.0.
-
 define float @trunc_denorm_lost_fraction2() {
 ; CHECK-LABEL: @trunc_denorm_lost_fraction2(
-; CHECK-NEXT:    ret float 0x36A0000000000000
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %b = fptrunc double 0x000000001fffffff to float
   ret float %b
@@ -107,11 +103,9 @@ define float @trunc_denorm_lost_fraction3() {
   ret float %b
 }
 
-; FIXME: This should be -0.0.
-
 define float @trunc_denorm_lost_fraction4() {
 ; CHECK-LABEL: @trunc_denorm_lost_fraction4(
-; CHECK-NEXT:    ret float 0xB6A0000000000000
+; CHECK-NEXT:    ret float -0.000000e+00
 ;
   %b = fptrunc double 0x8000000010000001 to float
   ret float %b

diff  --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 1683a9d671734..a53eabedb63f8 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1859,6 +1859,48 @@ TEST(APFloatTest, convert) {
   EXPECT_EQ(0x7fc00000, test.bitcastToAPInt());
   EXPECT_TRUE(losesInfo);
   EXPECT_EQ(status, APFloat::opOK);
+
+  // Test that subnormals are handled correctly in double to float conversion
+  test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022");
+  test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022");
+  test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022");
+  test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022");
+  test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022");
+  test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+
+  // Test subnormal conversion to bfloat
+  test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
+  test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0.0f, test.convertToFloat());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126");
+  test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(0x01, test.bitcastToAPInt());
+  EXPECT_FALSE(losesInfo);
+
+  test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
+  test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
+  EXPECT_EQ(0x01, test.bitcastToAPInt());
+  EXPECT_TRUE(losesInfo);
 }
 
 TEST(APFloatTest, PPCDoubleDouble) {