[flang-commits] [flang] [flang][runtime] Return +/-HUGE() for some real input roundings (PR #75525)

Thu Dec 14 12:20:58 PST 2023

https://github.com/klausler created https://github.com/llvm/llvm-project/pull/75525

The Fortran standard says that overflow input cases in some rounding modes (RZ, RD, RU) should round to a "representable" number.  Some Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf. Follow the precedent of gfortran and the Intel compilers.

>From c5f9e0b8438a80b16fbfd5e56e72d1dd9c39598b Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Thu, 14 Dec 2023 12:16:13 -0800
Subject: [PATCH] [flang][runtime] Return +/-HUGE() for some real input
 roundings

The Fortran standard says that overflow input cases in some rounding
modes (RZ, RD, RU) should round to a "representable" number.  Some
Fortran compilers interpret this to mean +/-HUGE(), some as +/-Inf.
Follow the precedent of gfortran and the Intel compilers.
---
 flang/lib/Decimal/big-radix-floating-point.h  |   6 +
 flang/lib/Decimal/decimal-to-binary.cpp       |  38 ++++--
 flang/runtime/edit-input.cpp                  |  15 ++-
 .../unittests/Runtime/NumericalFormatTest.cpp | 109 ++++++++++--------
 4 files changed, 110 insertions(+), 58 deletions(-)

diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 7d5d31b7788d76..2143d1d9b3f776 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -369,6 +369,12 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
     return result;
   }
+  constexpr Raw HUGE() const {
+    Raw result{static_cast<Raw>(Real::maxExponent)};
+    result <<= Real::significandBits;
+    result |= SignBit();
+    return result - 1; // decrement exponent, set all significand bits
+  }
 
   Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD
   int digits_{0}; // # of elements in digit_[] array; zero when zero
diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp
index d5b66b9fb93388..7ab78400d9f6b2 100644
--- a/flang/lib/Decimal/decimal-to-binary.cpp
+++ b/flang/lib/Decimal/decimal-to-binary.cpp
@@ -237,6 +237,15 @@ template <int PREC> class IntermediateFloat {
   int exponent_{0};
 };
 
+// The standard says that these overflow cases round to "representable"
+// numbers, and some popular compilers interpret that to mean +/-HUGE()
+// rather than +/-Inf.
+static inline constexpr bool RoundOverflowToHuge(
+    enum FortranRounding rounding, bool isNegative) {
+  return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
+      (isNegative && rounding == RoundUp);
+}
+
 template <int PREC>
 ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
     bool isNegative, FortranRounding rounding) const {
@@ -259,8 +268,8 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
   if (fraction == 0 && guard <= oneHalf) {
     if ((!isNegative && rounding == RoundUp) ||
         (isNegative && rounding == RoundDown)) {
-      // round to minimum nonzero value
-    } else {
+      // round to least nonzero value
+    } else { // zero
       return {Binary{}, static_cast<enum ConversionResultFlags>(flags)};
     }
   } else {
@@ -303,12 +312,17 @@ ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
     expo = 0; // subnormal
   }
   if (expo >= Binary::maxExponent) {
-    expo = Binary::maxExponent; // Inf
-    flags |= Overflow;
-    if constexpr (Binary::bits == 80) { // x87
-      fraction = IntType{1} << 63;
-    } else {
-      fraction = 0;
+    if (RoundOverflowToHuge(rounding, isNegative)) {
+      expo = Binary::maxExponent - 1;
+      fraction = mask;
+    } else { // Inf
+      expo = Binary::maxExponent;
+      flags |= Overflow;
+      if constexpr (Binary::bits == 80) { // x87
+        fraction = IntType{1} << 63;
+      } else {
+        fraction = 0;
+      }
     }
   }
   using Raw = typename Binary::RawType;
@@ -344,8 +358,12 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
     } else { // underflow to +/-0.
       return {Real{SignBit()}, Inexact};
     }
-  } else if (exponent_ > crazy) { // overflow to +/-Inf.
-    return {Real{Infinity()}, Overflow};
+  } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
+    if (RoundOverflowToHuge(rounding_, isNegative_)) {
+      return {Real{HUGE()}};
+    } else {
+      return {Real{Infinity()}, Overflow};
+    }
   }
   // Apply any negative decimal exponent by multiplication
   // by a power of two, adjusting the binary exponent to compensate.
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 822099b5141b1b..79c156a43501df 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -640,20 +640,29 @@ decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
   }
   // Package & return result
   constexpr RawType significandMask{(one << RealType::significandBits) - 1};
+  int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
   if (!fraction) {
     expo = 0;
   } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
     expo = 0; // subnormal
   } else if (expo >= RealType::maxExponent) {
-    expo = RealType::maxExponent; // +/-Inf
-    fraction = 0;
+    if (rounding == decimal::RoundToZero ||
+        (rounding == decimal::RoundDown && !isNegative) ||
+        (rounding == decimal::RoundUp && isNegative)) {
+      expo = RealType::maxExponent - 1; // +/-HUGE()
+      fraction = significandMask;
+    } else {
+      expo = RealType::maxExponent; // +/-Inf
+      fraction = 0;
+      flags |= decimal::Overflow;
+    }
   } else {
     fraction &= significandMask; // remove explicit normalization unless x87
   }
   return decimal::ConversionToBinaryResult<binaryPrecision>{
       RealType{static_cast<RawType>(signBit |
           static_cast<RawType>(expo) << RealType::significandBits | fraction)},
-      (roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
+      static_cast<decimal::ConversionResultFlags>(flags)};
 }
 
 template <int KIND>
diff --git a/flang/unittests/Runtime/NumericalFormatTest.cpp b/flang/unittests/Runtime/NumericalFormatTest.cpp
index b5b8eb05943732..f5c19d2bd71ced 100644
--- a/flang/unittests/Runtime/NumericalFormatTest.cpp
+++ b/flang/unittests/Runtime/NumericalFormatTest.cpp
@@ -840,49 +840,66 @@ TEST(IOApiTests, FormatIntegerValues) {
 
 // Ensure double input values correctly map to raw uint64 values
 TEST(IOApiTests, EditDoubleInputValues) {
-  using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t>;
+  using TestCaseTy = std::tuple<const char *, const char *, std::uint64_t, int>;
+  int ovf{IostatRealInputOverflow};
   static const std::vector<TestCaseTy> testCases{
-      {"(F18.0)", "                 0", 0x0},
-      {"(F18.0)", "                  ", 0x0},
-      {"(F18.0)", "                -0", 0x8000000000000000},
-      {"(F18.0)", "                01", 0x3ff0000000000000},
-      {"(F18.0)", "                 1", 0x3ff0000000000000},
-      {"(F18.0)", "              125.", 0x405f400000000000},
-      {"(F18.0)", "              12.5", 0x4029000000000000},
-      {"(F18.0)", "              1.25", 0x3ff4000000000000},
-      {"(F18.0)", "             01.25", 0x3ff4000000000000},
-      {"(F18.0)", "              .125", 0x3fc0000000000000},
-      {"(F18.0)", "             0.125", 0x3fc0000000000000},
-      {"(F18.0)", "             .0625", 0x3fb0000000000000},
-      {"(F18.0)", "            0.0625", 0x3fb0000000000000},
-      {"(F18.0)", "               125", 0x405f400000000000},
-      {"(F18.1)", "               125", 0x4029000000000000},
-      {"(F18.2)", "               125", 0x3ff4000000000000},
-      {"(F18.3)", "               125", 0x3fc0000000000000},
-      {"(-1P,F18.0)", "               125", 0x4093880000000000}, // 1250
-      {"(1P,F18.0)", "               125", 0x4029000000000000}, // 12.5
-      {"(BZ,F18.0)", "              125 ", 0x4093880000000000}, // 1250
-      {"(BZ,F18.0)", "       125 . e +1 ", 0x42a6bcc41e900000}, // 1.25e13
-      {"(BZ,F18.0)", "           .      ", 0x0},
-      {"(BZ,F18.0)", "           . e +1 ", 0x0},
-      {"(DC,F18.0)", "              12,5", 0x4029000000000000},
-      {"(EX22.0)", "0X0P0                 ", 0x0}, // +0.
-      {"(EX22.0)", "-0X0P0                ", 0x8000000000000000}, // -0.
-      {"(EX22.0)", "0X.8P1                ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X8.P-3               ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X.1P4                ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X10.P-4              ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X8.00P-3             ", 0x3ff0000000000000}, // 1.0
-      {"(EX22.0)", "0X80.0P-6             ", 0x4000000000000000}, // 2.0
-      {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7   ", 0x3fb999999999999a}, // 0.1
-      {"(EX22.0)", "0X.8P-1021            ", 0x0010000000000000}, // min normal
-      {"(EX22.0)", "0X.8P-1022            ", 0x0008000000000000}, // subnormal
-      {"(EX22.0)", "0X.8P-1073            ", 0x0000000000000001}, // min subn.
-      {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff}, // max finite
-      {"(EX22.0)", "0X.8P1025             ", 0x7ff0000000000000}, // +Inf
-      {"(EX22.0)", "-0X.8P1025            ", 0xfff0000000000000}, // -Inf
+      {"(F18.0)", "                 0", 0x0, 0},
+      {"(F18.0)", "                  ", 0x0, 0},
+      {"(F18.0)", "                -0", 0x8000000000000000, 0},
+      {"(F18.0)", "                01", 0x3ff0000000000000, 0},
+      {"(F18.0)", "                 1", 0x3ff0000000000000, 0},
+      {"(F18.0)", "              125.", 0x405f400000000000, 0},
+      {"(F18.0)", "              12.5", 0x4029000000000000, 0},
+      {"(F18.0)", "              1.25", 0x3ff4000000000000, 0},
+      {"(F18.0)", "             01.25", 0x3ff4000000000000, 0},
+      {"(F18.0)", "              .125", 0x3fc0000000000000, 0},
+      {"(F18.0)", "             0.125", 0x3fc0000000000000, 0},
+      {"(F18.0)", "             .0625", 0x3fb0000000000000, 0},
+      {"(F18.0)", "            0.0625", 0x3fb0000000000000, 0},
+      {"(F18.0)", "               125", 0x405f400000000000, 0},
+      {"(F18.1)", "               125", 0x4029000000000000, 0},
+      {"(F18.2)", "               125", 0x3ff4000000000000, 0},
+      {"(F18.3)", "               125", 0x3fc0000000000000, 0},
+      {"(-1P,F18.0)", "               125", 0x4093880000000000, 0}, // 1250
+      {"(1P,F18.0)", "               125", 0x4029000000000000, 0}, // 12.5
+      {"(BZ,F18.0)", "              125 ", 0x4093880000000000, 0}, // 1250
+      {"(BZ,F18.0)", "       125 . e +1 ", 0x42a6bcc41e900000, 0}, // 1.25e13
+      {"(BZ,F18.0)", "           .      ", 0x0, 0},
+      {"(BZ,F18.0)", "           . e +1 ", 0x0, 0},
+      {"(DC,F18.0)", "              12,5", 0x4029000000000000, 0},
+      {"(EX22.0)", "0X0P0                 ", 0x0, 0}, // +0.
+      {"(EX22.0)", "-0X0P0                ", 0x8000000000000000, 0}, // -0.
+      {"(EX22.0)", "0X.8P1                ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X8.P-3               ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X.1P4                ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X10.P-4              ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X8.00P-3             ", 0x3ff0000000000000, 0}, // 1.0
+      {"(EX22.0)", "0X80.0P-6             ", 0x4000000000000000, 0}, // 2.0
+      {"(EX22.0)", "0XC.CCCCCCCCCCCDP-7   ", 0x3fb999999999999a, 0}, // 0.1
+      {"(EX22.0)", "0X.8P-1021            ", 0x0010000000000000,
+          0}, // min normal
+      {"(EX22.0)", "0X.8P-1022            ", 0x0008000000000000,
+          0}, // subnormal
+      {"(EX22.0)", "0X.8P-1073            ", 0x0000000000000001,
+          0}, // min subn.
+      {"(EX22.0)", "0X.FFFFFFFFFFFFF8P1024", 0x7fefffffffffffff,
+          0}, // max finite
+      {"(EX22.0)", "0X.8P1025             ", 0x7ff0000000000000, ovf}, // +Inf
+      {"(EX22.0)", "-0X.8P1025            ", 0xfff0000000000000, ovf}, // -Inf
+      {"(RZ,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RD,F7.0)", " 2.e308", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RU,F7.0)", " 2.e308", 0x7ff0000000000000, ovf}, // +Inf
+      {"(RZ,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(RD,F7.0)", "-2.e308", 0xfff0000000000000, ovf}, // -Inf
+      {"(RU,F7.0)", "-2.e308", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(RZ,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RD,F7.0)", " 1.e999", 0x7fefffffffffffff, 0}, // +HUGE()
+      {"(RU,F7.0)", " 1.e999", 0x7ff0000000000000, ovf}, // +Inf
+      {"(RZ,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
+      {"(RD,F7.0)", "-1.e999", 0xfff0000000000000, ovf}, // -Inf
+      {"(RU,F7.0)", "-1.e999", 0xffefffffffffffff, 0}, // -HUGE()
   };
-  for (auto const &[format, data, want] : testCases) {
+  for (auto const &[format, data, want, iostat] : testCases) {
     auto cookie{IONAME(BeginInternalFormattedInput)(
         data, std::strlen(data), format, std::strlen(format))};
     union {
@@ -899,12 +916,14 @@ TEST(IOApiTests, EditDoubleInputValues) {
     char iomsg[bufferSize];
     std::memset(iomsg, '\0', bufferSize - 1);
 
-    // Ensure no errors were encountered reading input buffer into union value
+    // Ensure no unexpected errors were encountered reading input buffer into
+    // union value
     IONAME(GetIoMsg)(cookie, iomsg, bufferSize - 1);
     auto status{IONAME(EndIoStatement)(cookie)};
-    ASSERT_EQ(status, 0) << '\'' << format << "' failed reading '" << data
-                         << "', status " << static_cast<int>(status)
-                         << " iomsg '" << iomsg << "'";
+    ASSERT_EQ(status, iostat)
+        << '\'' << format << "' failed reading '" << data << "', status "
+        << static_cast<int>(status) << " != expected " << iostat << " iomsg '"
+        << iomsg << "'";
 
     // Ensure raw uint64 value matches expected conversion from double
     ASSERT_EQ(u.raw, want) << '\'' << format << "' failed reading '" << data