[llvm] 14a1b80 - Make IEEEFloat::roundToIntegral more standard conformant
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 21:51:48 PDT 2020
Author: Serge Pavlov
Date: 2020-03-11T10:38:46+07:00
New Revision: 14a1b80e044aac1947c891525cf30521be0a79b7
URL: https://github.com/llvm/llvm-project/commit/14a1b80e044aac1947c891525cf30521be0a79b7
DIFF: https://github.com/llvm/llvm-project/commit/14a1b80e044aac1947c891525cf30521be0a79b7.diff
LOG: Make IEEEFloat::roundToIntegral more standard conformant
Behavior of IEEEFloat::roundToIntegral is aligned with IEEE-754
operation roundToIntegralExact. In partucular this function now:
- returns opInvalid for signaling NaNs,
- returns opInexact if the result of rounding differs from argument.
Differential Revision: https://reviews.llvm.org/D75246
Added:
Modified:
llvm/lib/Support/APFloat.cpp
llvm/unittests/ADT/APFloatTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 5379d29e139a..49f9cf8a32f8 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -1977,14 +1977,59 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
return fs;
}
-/* Rounding-mode corrrect round to integral value. */
+/* Rounding-mode correct round to integral value. */
IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
opStatus fs;
+ if (isInfinity())
+ // [IEEE Std 754-2008 6.1]:
+ // The behavior of infinity in floating-point arithmetic is derived from the
+ // limiting cases of real arithmetic with operands of arbitrarily
+ // large magnitude, when such a limit exists.
+ // ...
+ // Operations on infinite operands are usually exact and therefore signal no
+ // exceptions ...
+ return opOK;
+
+ if (isNaN()) {
+ if (isSignaling()) {
+ // [IEEE Std 754-2008 6.2]:
+ // Under default exception handling, any operation signaling an invalid
+ // operation exception and for which a floating-point result is to be
+ // delivered shall deliver a quiet NaN.
+ makeQuiet();
+ // [IEEE Std 754-2008 6.2]:
+ // Signaling NaNs shall be reserved operands that, under default exception
+ // handling, signal the invalid operation exception(see 7.2) for every
+ // general-computational and signaling-computational operation except for
+ // the conversions described in 5.12.
+ return opInvalidOp;
+ } else {
+ // [IEEE Std 754-2008 6.2]:
+ // For an operation with quiet NaN inputs, other than maximum and minimum
+ // operations, if a floating-point result is to be delivered the result
+ // shall be a quiet NaN which should be one of the input NaNs.
+ // ...
+ // Every general-computational and quiet-computational operation involving
+ // one or more input NaNs, none of them signaling, shall signal no
+ // exception, except fusedMultiplyAdd might signal the invalid operation
+ // exception(see 7.2).
+ return opOK;
+ }
+ }
+
+ if (isZero()) {
+ // [IEEE Std 754-2008 6.3]:
+ // ... the sign of the result of conversions, the quantize operation, the
+ // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
+ // the sign of the first or only operand.
+ return opOK;
+ }
+
// If the exponent is large enough, we know that this value is already
// integral, and the arithmetic below would potentially cause it to saturate
// to +/-Inf. Bail out early instead.
- if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
+ if (exponent+1 >= (int)semanticsPrecision(*semantics))
return opOK;
// The algorithm here is quite simple: we add 2^(p-1), where p is the
@@ -1998,19 +2043,18 @@ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
IEEEFloat MagicConstant(*semantics);
fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
rmNearestTiesToEven);
+ assert(fs == opOK);
MagicConstant.sign = sign;
- if (fs != opOK)
- return fs;
-
- // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+ // Preserve the input sign so that we can handle the case of zero result
+ // correctly.
bool inputSign = isNegative();
fs = add(MagicConstant, rounding_mode);
- if (fs != opOK && fs != opInexact)
- return fs;
- fs = subtract(MagicConstant, rounding_mode);
+ // Current value and 'MagicConstant' are both integers, so the result of the
+ // subtraction is always exact according to Sterbenz' lemma.
+ subtract(MagicConstant, rounding_mode);
// Restore the input sign.
if (inputSign != isNegative())
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 7814d94ae4c6..b24b43d09a40 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -1525,6 +1525,124 @@ TEST(APFloatTest, roundToIntegral) {
P = APFloat::getInf(APFloat::IEEEdouble(), true);
P.roundToIntegral(APFloat::rmTowardZero);
EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0);
+
+ APFloat::opStatus St;
+
+ P = APFloat::getNaN(APFloat::IEEEdouble());
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isNaN());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getNaN(APFloat::IEEEdouble(), true);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isNaN());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getSNaN(APFloat::IEEEdouble());
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isNaN());
+ EXPECT_FALSE(P.isSignaling());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opInvalidOp, St);
+
+ P = APFloat::getSNaN(APFloat::IEEEdouble(), true);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isNaN());
+ EXPECT_FALSE(P.isSignaling());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opInvalidOp, St);
+
+ P = APFloat::getInf(APFloat::IEEEdouble());
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isInfinity());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getInf(APFloat::IEEEdouble(), true);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isInfinity());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getZero(APFloat::IEEEdouble(), false);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getZero(APFloat::IEEEdouble(), false);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getZero(APFloat::IEEEdouble(), true);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat::getZero(APFloat::IEEEdouble(), true);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat(1E-100);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(1E-100);
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(1.0, P.convertToDouble());
+ EXPECT_FALSE(P.isNegative());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(-1E-100);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(-1.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(-1E-100);
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_TRUE(P.isZero());
+ EXPECT_TRUE(P.isNegative());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(10.0);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(10.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opOK, St);
+
+ P = APFloat(10.5);
+ St = P.roundToIntegral(APFloat::rmTowardZero);
+ EXPECT_EQ(10.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(10.5);
+ St = P.roundToIntegral(APFloat::rmTowardPositive);
+ EXPECT_EQ(11.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(10.5);
+ St = P.roundToIntegral(APFloat::rmTowardNegative);
+ EXPECT_EQ(10.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(10.5);
+ St = P.roundToIntegral(APFloat::rmNearestTiesToAway);
+ EXPECT_EQ(11.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opInexact, St);
+
+ P = APFloat(10.5);
+ St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
+ EXPECT_EQ(10.0, P.convertToDouble());
+ EXPECT_EQ(APFloat::opInexact, St);
}
TEST(APFloatTest, isInteger) {
More information about the llvm-commits
mailing list