[llvm] 14a1b80 - Make IEEEFloat::roundToIntegral more standard conformant

Tue Mar 10 21:51:48 PDT 2020

Author: Serge Pavlov
Date: 2020-03-11T10:38:46+07:00
New Revision: 14a1b80e044aac1947c891525cf30521be0a79b7

URL: https://github.com/llvm/llvm-project/commit/14a1b80e044aac1947c891525cf30521be0a79b7
DIFF: https://github.com/llvm/llvm-project/commit/14a1b80e044aac1947c891525cf30521be0a79b7.diff

LOG: Make IEEEFloat::roundToIntegral more standard conformant

Behavior of IEEEFloat::roundToIntegral is aligned with IEEE-754
operation roundToIntegralExact. In partucular this function now:
- returns opInvalid for signaling NaNs,
- returns opInexact if the result of rounding differs from argument.

Differential Revision: https://reviews.llvm.org/D75246

Added: 
    

Modified: 
    llvm/lib/Support/APFloat.cpp
    llvm/unittests/ADT/APFloatTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 5379d29e139a..49f9cf8a32f8 100644

--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -1977,14 +1977,59 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
   return fs;
 }
 
-/* Rounding-mode corrrect round to integral value.  */
+/* Rounding-mode correct round to integral value.  */
 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
   opStatus fs;
 
+  if (isInfinity())
+    // [IEEE Std 754-2008 6.1]:
+    // The behavior of infinity in floating-point arithmetic is derived from the
+    // limiting cases of real arithmetic with operands of arbitrarily
+    // large magnitude, when such a limit exists.
+    // ...
+    // Operations on infinite operands are usually exact and therefore signal no
+    // exceptions ...
+    return opOK;
+
+  if (isNaN()) {
+    if (isSignaling()) {
+      // [IEEE Std 754-2008 6.2]:
+      // Under default exception handling, any operation signaling an invalid
+      // operation exception and for which a floating-point result is to be
+      // delivered shall deliver a quiet NaN.
+      makeQuiet();
+      // [IEEE Std 754-2008 6.2]:
+      // Signaling NaNs shall be reserved operands that, under default exception
+      // handling, signal the invalid operation exception(see 7.2) for every
+      // general-computational and signaling-computational operation except for
+      // the conversions described in 5.12.
+      return opInvalidOp;
+    } else {
+      // [IEEE Std 754-2008 6.2]:
+      // For an operation with quiet NaN inputs, other than maximum and minimum
+      // operations, if a floating-point result is to be delivered the result
+      // shall be a quiet NaN which should be one of the input NaNs.
+      // ...
+      // Every general-computational and quiet-computational operation involving
+      // one or more input NaNs, none of them signaling, shall signal no
+      // exception, except fusedMultiplyAdd might signal the invalid operation
+      // exception(see 7.2).
+      return opOK;
+    }
+  }
+
+  if (isZero()) {
+    // [IEEE Std 754-2008 6.3]:
+    // ... the sign of the result of conversions, the quantize operation, the
+    // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
+    // the sign of the first or only operand.
+    return opOK;
+  }
+
   // If the exponent is large enough, we know that this value is already
   // integral, and the arithmetic below would potentially cause it to saturate
   // to +/-Inf.  Bail out early instead.
-  if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
+  if (exponent+1 >= (int)semanticsPrecision(*semantics))
     return opOK;
 
   // The algorithm here is quite simple: we add 2^(p-1), where p is the
@@ -1998,19 +2043,18 @@ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
   IEEEFloat MagicConstant(*semantics);
   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
                                       rmNearestTiesToEven);
+  assert(fs == opOK);
   MagicConstant.sign = sign;
 
-  if (fs != opOK)
-    return fs;
-
-  // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+  // Preserve the input sign so that we can handle the case of zero result
+  // correctly.
   bool inputSign = isNegative();
 
   fs = add(MagicConstant, rounding_mode);
-  if (fs != opOK && fs != opInexact)
-    return fs;
 
-  fs = subtract(MagicConstant, rounding_mode);
+  // Current value and 'MagicConstant' are both integers, so the result of the
+  // subtraction is always exact according to Sterbenz' lemma.
+  subtract(MagicConstant, rounding_mode);
 
   // Restore the input sign.
   if (inputSign != isNegative())

diff  --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 7814d94ae4c6..b24b43d09a40 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1525,6 +1525,124 @@ TEST(APFloatTest, roundToIntegral) {
   P = APFloat::getInf(APFloat::IEEEdouble(), true);
   P.roundToIntegral(APFloat::rmTowardZero);
   EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0);
+
+  APFloat::opStatus St;
+
+  P = APFloat::getNaN(APFloat::IEEEdouble());
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isNaN());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getNaN(APFloat::IEEEdouble(), true);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isNaN());
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getSNaN(APFloat::IEEEdouble());
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isNaN());
+  EXPECT_FALSE(P.isSignaling());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opInvalidOp, St);
+
+  P = APFloat::getSNaN(APFloat::IEEEdouble(), true);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isNaN());
+  EXPECT_FALSE(P.isSignaling());
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(APFloat::opInvalidOp, St);
+
+  P = APFloat::getInf(APFloat::IEEEdouble());
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isInfinity());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getInf(APFloat::IEEEdouble(), true);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isInfinity());
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getZero(APFloat::IEEEdouble(), false);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isZero());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getZero(APFloat::IEEEdouble(), false);
+  St = P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_TRUE(P.isZero());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getZero(APFloat::IEEEdouble(), true);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_TRUE(P.isZero());
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat::getZero(APFloat::IEEEdouble(), true);
+  St = P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_TRUE(P.isZero());
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat(1E-100);
+  St = P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_TRUE(P.isZero());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(1E-100);
+  St = P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(1.0, P.convertToDouble());
+  EXPECT_FALSE(P.isNegative());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(-1E-100);
+  St = P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(-1.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(-1E-100);
+  St = P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_TRUE(P.isZero());
+  EXPECT_TRUE(P.isNegative());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(10.0);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(10.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opOK, St);
+
+  P = APFloat(10.5);
+  St = P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(10.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(10.5);
+  St = P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(11.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(10.5);
+  St = P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(10.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(10.5);
+  St = P.roundToIntegral(APFloat::rmNearestTiesToAway);
+  EXPECT_EQ(11.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opInexact, St);
+
+  P = APFloat(10.5);
+  St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(10.0, P.convertToDouble());
+  EXPECT_EQ(APFloat::opInexact, St);
 }
 
 TEST(APFloatTest, isInteger) {