[flang-commits] [flang] [flang][runtime] Return +/-HUGE() for some real input roundings (PR #75525)
Peter Klausler via flang-commits
flang-commits at lists.llvm.org
Thu Dec 14 12:20:58 PST 2023
https://github.com/klausler created https://github.com/llvm/llvm-project/pull/75525
The Fortran standard says that overflow input cases in some rounding modes (RZ, RD, RU) should round to a "representable" number. Some Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf. Follow the precedent of gfortran and the Intel compilers.
>From c5f9e0b8438a80b16fbfd5e56e72d1dd9c39598b Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Thu, 14 Dec 2023 12:16:13 -0800
Subject: [PATCH] [flang][runtime] Return +/-HUGE() for some real input
roundings
The Fortran standard says that overflow input cases in some rounding
modes (RZ, RD, RU) should round to a "representable" number. Some
Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf.
Follow the precedent of gfortran and the Intel compilers.
---
flang/lib/Decimal/big-radix-floating-point.h | 6 +
flang/lib/Decimal/decimal-to-binary.cpp | 38 ++++--
flang/runtime/edit-input.cpp | 15 ++-
.../unittests/Runtime/NumericalFormatTest.cpp | 109 ++++++++++--------
4 files changed, 110 insertions(+), 58 deletions(-)
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 7d5d31b7788d76..2143d1d9b3f776 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -369,6 +369,12 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
}
return result;
}
+ constexpr Raw HUGE() const {
+ Raw result{static_cast<Raw>(Real::maxExponent)};
+ result <<= Real::significandBits;
+ result |= SignBit();
+ return result - 1; // decrement exponent, set all significand bits
+ }
Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
int digits_{0}; // # of elements in digit_[] array; zero when zero
diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp
index d5b66b9fb93388..7ab78400d9f6b2 100644
--- a/flang/lib/Decimal/decimal-to-binary.cpp
+++ b/flang/lib/Decimal/decimal-to-binary.cpp
@@ -237,6 +237,15 @@ template <int PREC> class IntermediateFloat {
int exponent_{0};
};
+// The standard says that these overflow cases round to "representable"
+// numbers, and some popular compilers interpret that to mean +/-HUGE()
+// rather than +/-Inf.
+static inline constexpr bool RoundOverflowToHuge(
+ enum FortranRounding rounding, bool isNegative) {
+ return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
+ (isNegative && rounding == RoundUp);
+}
+
template <int PREC>
ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
bool isNegative, FortranRounding rounding) const {
@@ -259,8 +268,8 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
if (fraction == 0 && guard <= oneHalf) {
if ((!isNegative && rounding == RoundUp) ||
(isNegative && rounding == RoundDown)) {
- // round to minimum nonzero value
- } else {
+ // round to least nonzero value
+ } else { // zero
return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
}
} else {
@@ -303,12 +312,17 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
expo = 0; // subnormal
}
if (expo >= Binary::maxExponent) {
- expo = Binary::maxExponent; // Inf
- flags |= Overflow;
- if constexpr (Binary::bits == 80) { // x87
- fraction = IntType{1} << 63;
- } else {
- fraction = 0;
+ if (RoundOverflowToHuge(rounding, isNegative)) {
+ expo = Binary::maxExponent - 1;
+ fraction = mask;
+ } else { // Inf
+ expo = Binary::maxExponent;
+ flags |= Overflow;
+ if constexpr (Binary::bits == 80) { // x87
+ fraction = IntType{1} << 63;
+ } else {
+ fraction = 0;
+ }
}
}
using Raw = typename Binary::RawType;
@@ -344,8 +358,12 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
} else { // underflow to +/-0.
return {Real{SignBit()}, Inexact};
}
- } else if (exponent_ > crazy) { // overflow to +/-Inf.
- return {Real{Infinity()}, Overflow};
+ } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
+ if (RoundOverflowToHuge(rounding_, isNegative_)) {
+ return {Real{HUGE()}};
+ } else {
+ return {Real{Infinity()}, Overflow};
+ }
}
// Apply any negative decimal exponent by multiplication
// by a power of two, adjusting the binary exponent to compensate.
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 822099b5141b1b..79c156a43501df 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -640,20 +640,29 @@ decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
}
// Package & return result
constexpr RawType significandMask{(one << RealType::significandBits) - 1};
+ int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
if (!fraction) {
expo = 0;
} else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
expo = 0; // subnormal
} else if (expo >= RealType::maxExponent) {
- expo = RealType::maxExponent; // +/-Inf
- fraction = 0;
+ if (rounding == decimal::RoundToZero ||
+ (rounding == decimal::RoundDown && !isNegative) ||
+ (rounding == decimal::RoundUp && isNegative)) {
+ expo = RealType::maxExponent - 1; // +/-HUGE()
+ fraction = significandMask;
+ } else {
+ expo = RealType::maxExponent; // +/-Inf
+ fraction = 0;
+ flags |= decimal::Overflow;
+ }
} else {
fraction &= significandMask; // remove explicit normalization unless x87
}
return decimal::ConversionToBinaryResult<binaryPrecision>{
RealType{static_cast<RawType>(signBit |
static_cast<RawType>(expo) << RealType::significandBits | fraction)},
- (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
+ static_cast<decimal::ConversionResultFlags>(flags)};
}
template <int KIND>
diff --git a/flang/unittests/Runtime/NumericalFormatTest.cpp b/flang/unittests/Runtime/NumericalFormatTest.cpp
index b5b8eb05943732..f5c19d2bd71ced 100644
--- a/flang/unittests/Runtime/NumericalFormatTest.cpp
+++ b/flang/unittests/Runtime/NumericalFormatTest.cpp
@@ -840,49 +840,66 @@ TEST(IOApiTests, FormatIntegerValues) {
// Ensure double input values correctly map to raw uint64 values
TEST(IOApiTests, EditDoubleInputValues) {
- using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t>;
+ using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t, int>;
+ int ovf{IostatRealInputOverflow};
static const std::vector<TestCaseTy> testCases{
- {"(F18.0)", " 0", 0x0},
- {"(F18.0)", " ", 0x0},
- {"(F18.0)", " -0", 0x8000000000000000},
- {"(F18.0)", " 01", 0x3ff0000000000000},
- {"(F18.0)", " 1", 0x3ff0000000000000},
- {"(F18.0)", " 125.", 0x405f400000000000},
- {"(F18.0)", " 12.5", 0x4029000000000000},
- {"(F18.0)", " 1.25", 0x3ff4000000000000},
- {"(F18.0)", " 01.25", 0x3ff4000000000000},
- {"(F18.0)", " .125", 0x3fc0000000000000},
- {"(F18.0)", " 0.125", 0x3fc0000000000000},
- {"(F18.0)", " .0625", 0x3fb0000000000000},
- {"(F18.0)", " 0.0625", 0x3fb0000000000000},
- {"(F18.0)", " 125", 0x405f400000000000},
- {"(F18.1)", " 125", 0x4029000000000000},
- {"(F18.2)", " 125", 0x3ff4000000000000},
- {"(F18.3)", " 125", 0x3fc0000000000000},
- {"(-1P,F18.0)", " 125", 0x4093880000000000}, // 1250
- {"(1P,F18.0)", " 125", 0x4029000000000000}, // 12.5
- {"(BZ,F18.0)", " 125 ", 0x4093880000000000}, // 1250
- {"(BZ,F18.0)", " 125 . e +1 ", 0x42a6bcc41e900000}, // 1.25e13
- {"(BZ,F18.0)", " . ", 0x0},
- {"(BZ,F18.0)", " . e +1 ", 0x0},
- {"(DC,F18.0)", " 12,5", 0x4029000000000000},
- {"(EX22.0)", "0X0P0 ", 0x0}, // +0.
- {"(EX22.0)", "-0X0P0 ", 0x8000000000000000}, // -0.
- {"(EX22.0)", "0X.8P1 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X8.P-3 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X.1P4 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X10.P-4 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X8.00P-3 ", 0x3ff0000000000000}, // 1.0
- {"(EX22.0)", "0X80.0P-6 ", 0x4000000000000000}, // 2.0
- {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7 ", 0x3fb999999999999a}, // 0.1
- {"(EX22.0)", "0X.8P-1021 ", 0x0010000000000000}, // min normal
- {"(EX22.0)", "0X.8P-1022 ", 0x0008000000000000}, // subnormal
- {"(EX22.0)", "0X.8P-1073 ", 0x0000000000000001}, // min subn.
- {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff}, // max finite
- {"(EX22.0)", "0X.8P1025 ", 0x7ff0000000000000}, // +Inf
- {"(EX22.0)", "-0X.8P1025 ", 0xfff0000000000000}, // -Inf
+ {"(F18.0)", " 0", 0x0, 0},
+ {"(F18.0)", " ", 0x0, 0},
+ {"(F18.0)", " -0", 0x8000000000000000, 0},
+ {"(F18.0)", " 01", 0x3ff0000000000000, 0},
+ {"(F18.0)", " 1", 0x3ff0000000000000, 0},
+ {"(F18.0)", " 125.", 0x405f400000000000, 0},
+ {"(F18.0)", " 12.5", 0x4029000000000000, 0},
+ {"(F18.0)", " 1.25", 0x3ff4000000000000, 0},
+ {"(F18.0)", " 01.25", 0x3ff4000000000000, 0},
+ {"(F18.0)", " .125", 0x3fc0000000000000, 0},
+ {"(F18.0)", " 0.125", 0x3fc0000000000000, 0},
+ {"(F18.0)", " .0625", 0x3fb0000000000000, 0},
+ {"(F18.0)", " 0.0625", 0x3fb0000000000000, 0},
+ {"(F18.0)", " 125", 0x405f400000000000, 0},
+ {"(F18.1)", " 125", 0x4029000000000000, 0},
+ {"(F18.2)", " 125", 0x3ff4000000000000, 0},
+ {"(F18.3)", " 125", 0x3fc0000000000000, 0},
+ {"(-1P,F18.0)", " 125", 0x4093880000000000, 0}, // 1250
+ {"(1P,F18.0)", " 125", 0x4029000000000000, 0}, // 12.5
+ {"(BZ,F18.0)", " 125 ", 0x4093880000000000, 0}, // 1250
+ {"(BZ,F18.0)", " 125 . e +1 ", 0x42a6bcc41e900000, 0}, // 1.25e13
+ {"(BZ,F18.0)", " . ", 0x0, 0},
+ {"(BZ,F18.0)", " . e +1 ", 0x0, 0},
+ {"(DC,F18.0)", " 12,5", 0x4029000000000000, 0},
+ {"(EX22.0)", "0X0P0 ", 0x0, 0}, // +0.
+ {"(EX22.0)", "-0X0P0 ", 0x8000000000000000, 0}, // -0.
+ {"(EX22.0)", "0X.8P1 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X8.P-3 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X.1P4 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X10.P-4 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X8.00P-3 ", 0x3ff0000000000000, 0}, // 1.0
+ {"(EX22.0)", "0X80.0P-6 ", 0x4000000000000000, 0}, // 2.0
+ {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7 ", 0x3fb999999999999a, 0}, // 0.1
+ {"(EX22.0)", "0X.8P-1021 ", 0x0010000000000000,
+ 0}, // min normal
+ {"(EX22.0)", "0X.8P-1022 ", 0x0008000000000000,
+ 0}, // subnormal
+ {"(EX22.0)", "0X.8P-1073 ", 0x0000000000000001,
+ 0}, // min subn.
+ {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff,
+ 0}, // max finite
+ {"(EX22.0)", "0X.8P1025 ", 0x7ff0000000000000, ovf}, // +Inf
+ {"(EX22.0)", "-0X.8P1025 ", 0xfff0000000000000, ovf}, // -Inf
+ {"(RZ,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RD,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RU,F7.0)", " 2.e308", 0x7ff0000000000000, ovf}, // +Inf
+ {"(RZ,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+ {"(RD,F7.0)", "-2.e308", 0xfff0000000000000, ovf}, // -Inf
+ {"(RU,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+ {"(RZ,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RD,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+ {"(RU,F7.0)", " 1.e999", 0x7ff0000000000000, ovf}, // +Inf
+ {"(RZ,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
+ {"(RD,F7.0)", "-1.e999", 0xfff0000000000000, ovf}, // -Inf
+ {"(RU,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
};
- for (auto const &[format, data, want] : testCases) {
+ for (auto const &[format, data, want, iostat] : testCases) {
auto cookie{IONAME(BeginInternalFormattedInput)(
data, std::strlen(data), format, std::strlen(format))};
union {
@@ -899,12 +916,14 @@ TEST(IOApiTests, EditDoubleInputValues) {
char iomsg[bufferSize];
std::memset(iomsg, '\0', bufferSize - 1);
- // Ensure no errors were encountered reading input buffer into union value
+ // Ensure no unexpected errors were encountered reading input buffer into
+ // union value
IONAME(GetIoMsg)(cookie, iomsg, bufferSize - 1);
auto status{IONAME(EndIoStatement)(cookie)};
- ASSERT_EQ(status, 0) << '\'' << format << "' failed reading '" << data
- << "', status " << static_cast<int>(status)
- << " iomsg '" << iomsg << "'";
+ ASSERT_EQ(status, iostat)
+ << '\'' << format << "' failed reading '" << data << "', status "
+ << static_cast<int>(status) << " != expected " << iostat << " iomsg '"
+ << iomsg << "'";
// Ensure raw uint64 value matches expected conversion from double
ASSERT_EQ(u.raw, want) << '\'' << format << "' failed reading '" << data
More information about the flang-commits
mailing list