[flang-commits] [flang] [flang][runtime] Enforce proper termination of list-directed input va… (PR #66244)

via flang-commits flang-commits at lists.llvm.org
Wed Sep 13 10:03:06 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-runtime
            
<details>
<summary>Changes</summary>
…lues

Emit an error at runtime when a list-directed input value is not followed by a value separator or end of record.  Previously, the runtime I/O library was consuming as many input characters that were valid for the type of the value, and leaving any remaining characters for the next input edit, if any.
--
Full diff: https://github.com/llvm/llvm-project/pull/66244.diff

3 Files Affected:

- (modified) flang/include/flang/Runtime/iostat.h (+1) 
- (modified) flang/runtime/edit-input.cpp (+107-32) 
- (modified) flang/runtime/iostat.cpp (+2) 


<pre>
diff --git a/flang/include/flang/Runtime/iostat.h b/flang/include/flang/Runtime/iostat.h
index faadaab8e90ffa9..0456e24f4e381ba 100644
--- a/flang/include/flang/Runtime/iostat.h
+++ b/flang/include/flang/Runtime/iostat.h
@@ -84,6 +84,7 @@ enum Iostat {
   IostatBadFlushUnit,
   IostatBadOpOnChildUnit,
   IostatBadNewUnit,
+  IostatBadListDirectedInputSeparator,
 };
 
 const char *IostatErrorString(int);
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 98627c9dd82750a..a89612ab74640fb 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -16,6 +16,49 @@
 
 namespace Fortran::runtime::io {
 
+// Checks that a list-directed input value has been entirely consumed and
+// doesn&#x27;t contain unparsed characters before the next value separator.
+static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) {
+  char32_t comma{
+      edit.modes.editingFlags & decimalComma ? char32_t{&#x27;;&#x27;} : char32_t{&#x27;,&#x27;}};
+  return ch == &#x27; &#x27; || ch == &#x27;\t&#x27; || ch == &#x27;/&#x27; || ch == comma;
+}
+
+static inline bool IsListDirectedFieldComplete(
+    IoStatementState &io, const DataEdit &edit) {
+  std::size_t byteCount;
+  if (auto ch{io.GetCurrentChar(byteCount)}) {
+    return IsCharValueSeparator(edit, *ch);
+  } else {
+    return true; // end of record: ok
+  }
+}
+
+static bool CheckCompleteListDirectedField(
+    IoStatementState &io, const DataEdit &edit) {
+  if (edit.IsListDirected()) {
+    std::size_t byteCount;
+    if (auto ch{io.GetCurrentChar(byteCount)}) {
+      if (IsCharValueSeparator(edit, *ch)) {
+        return true;
+      } else {
+        const auto &connection{io.GetConnectionState()};
+        io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
+            "invalid character (0x%x) after list-directed input value, "
+            "at column %d in record %d",
+            static_cast<unsigned>(*ch),
+            static_cast<int>(connection.positionInRecord + 1),
+            static_cast<int>(connection.currentRecordNumber));
+        return false;
+      }
+    } else {
+      return true; // end of record: ok
+    }
+  } else {
+    return true;
+  }
+}
+
 template <int LOG2_BASE>
 static bool EditBOZInput(
     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
@@ -89,28 +132,29 @@ static bool EditBOZInput(
     *data |= digit << shift;
     shift -= LOG2_BASE;
   }
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 static inline char32_t GetDecimalPoint(const DataEdit &edit) {
   return edit.modes.editingFlags & decimalComma ? char32_t{&#x27;,&#x27;} : char32_t{&#x27;.&#x27;};
 }
 
-// Prepares input from a field, and consumes the sign, if any.
-// Returns true if there&#x27;s a &#x27;-&#x27; sign.
-static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
+// Prepares input from a field, and returns the sign, if any, else &#x27;\0&#x27;.
+static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
     std::optional<char32_t> &next, std::optional<int> &remaining) {
   remaining = io.CueUpInput(edit);
   next = io.NextInField(remaining, edit);
-  bool negative{false};
+  char sign{&#x27;\0&#x27;};
   if (next) {
-    negative = *next == &#x27;-&#x27;;
-    if (negative || *next == &#x27;+&#x27;) {
-      io.SkipSpaces(remaining);
+    if (*next == &#x27;-&#x27; || *next == &#x27;+&#x27;) {
+      sign = *next;
+      if (!edit.IsListDirected()) {
+        io.SkipSpaces(remaining);
+      }
       next = io.NextInField(remaining, edit);
     }
   }
-  return negative;
+  return sign;
 }
 
 bool EditIntegerInput(
@@ -141,9 +185,9 @@ bool EditIntegerInput(
   }
   std::optional<int> remaining;
   std::optional<char32_t> next;
-  bool negate{ScanNumericPrefix(io, edit, next, remaining)};
+  char sign{ScanNumericPrefix(io, edit, next, remaining)};
   common::UnsignedInt128 value{0};
-  bool any{negate};
+  bool any{!!sign};
   bool overflow{false};
   for (; next; next = io.NextInField(remaining, edit)) {
     char32_t ch{*next};
@@ -178,13 +222,13 @@ bool EditIntegerInput(
     return false;
   }
   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
-  overflow |= value >= maxForKind && (value > maxForKind || !negate);
+  overflow |= value >= maxForKind && (value > maxForKind || sign != &#x27;-&#x27;);
   if (overflow) {
     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
         "Decimal input overflows INTEGER(%d) variable", kind);
     return false;
   }
-  if (negate) {
+  if (sign == &#x27;-&#x27;) {
     value = -value;
   }
   if (any || !io.GetConnectionState().IsAtEOF()) {
@@ -212,13 +256,17 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
     }
     ++got;
   }};
-  if (ScanNumericPrefix(io, edit, next, remaining)) {
+  char sign{ScanNumericPrefix(io, edit, next, remaining)};
+  if (sign == &#x27;-&#x27;) {
     Put(&#x27;-&#x27;);
   }
   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
-  if (!next || (!bzMode && *next == &#x27; &#x27;)) { // empty/blank field means zero
-    remaining.reset();
-    if (!io.GetConnectionState().IsAtEOF()) {
+  if (!next || (!bzMode && *next == &#x27; &#x27;)) {
+    if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
+      // An empty/blank field means zero when not list-directed.
+      // A fixed-width field containing only a sign is also zero;
+      // this behavior isn&#x27;t standard-conforming in F&#x27;2023 but it is
+      // required to pass FCVS.
       Put(&#x27;0&#x27;);
     }
     return got;
@@ -286,6 +334,10 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
       // the FCVS suite.
       Put(&#x27;0&#x27;); // emit at least one digit
     }
+    // In list-directed input, a bad exponent is not consumed.
+    auto nextBeforeExponent{next};
+    auto startExponent{io.GetConnectionState().positionInRecord};
+    bool hasGoodExponent{false};
     if (next &&
         (*next == &#x27;e&#x27; || *next == &#x27;E&#x27; || *next == &#x27;d&#x27; || *next == &#x27;D&#x27; ||
             *next == &#x27;q&#x27; || *next == &#x27;Q&#x27;)) {
@@ -306,11 +358,13 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
       }
       for (exponent = 0; next; next = io.NextInField(remaining, edit)) {
         if (*next >= &#x27;0&#x27; && *next <= &#x27;9&#x27;) {
+          hasGoodExponent = true;
           if (exponent < 10000) {
             exponent = 10 * exponent + *next - &#x27;0&#x27;;
           }
         } else if (*next == &#x27; &#x27; || *next == &#x27;\t&#x27;) {
           if (bzMode) {
+            hasGoodExponent = true;
             exponent = 10 * exponent;
           }
         } else {
@@ -321,6 +375,11 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
         exponent = -exponent;
       }
     }
+    if (!hasGoodExponent) {
+      // There isn&#x27;t a good exponent; do not consume it.
+      next = nextBeforeExponent;
+      io.HandleAbsolutePosition(startExponent);
+    }
     if (decimalPoint) {
       exponent += *decimalPoint;
     } else {
@@ -339,6 +398,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
   // input value.
   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
     if (next && (*next == &#x27; &#x27; || *next == &#x27;\t&#x27;)) {
+      io.SkipSpaces(remaining);
       next = io.NextInField(remaining, edit);
     }
     if (!next) { // NextInField fails on separators like &#x27;)&#x27;
@@ -423,19 +483,26 @@ static bool TryFastPathRealInput(
       return false;
     }
   }
-  for (; p < limit && (*p == &#x27; &#x27; || *p == &#x27;\t&#x27;); ++p) {
-  }
   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
-    // Need to consume a trailing &#x27;)&#x27; and any white space after
-    if (p >= limit || *p != &#x27;)&#x27;) {
+    // Need to consume a trailing &#x27;)&#x27;, possibly with leading spaces
+    for (; p < limit && (*p == &#x27; &#x27; || *p == &#x27;\t&#x27;); ++p) {
+    }
+    if (p < limit && *p == &#x27;)&#x27;) {
+      ++p;
+    } else {
+      return false;
+    }
+  } else if (edit.IsListDirected()) {
+    if (p < limit && !IsCharValueSeparator(edit, *p)) {
       return false;
     }
-    for (++p; p < limit && (*p == &#x27; &#x27; || *p == &#x27;\t&#x27;); ++p) {
+  } else {
+    for (; p < limit && (*p == &#x27; &#x27; || *p == &#x27;\t&#x27;); ++p) {
+    }
+    if (edit.width && p < str + *edit.width) {
+      return false; // unconverted characters remain in fixed width field
     }
   }
-  if (edit.width && p < str + *edit.width) {
-    return false; // unconverted characters remain in fixed width field
-  }
   // Success on the fast path!
   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
       converted.binary;
@@ -451,7 +518,7 @@ template <int KIND>
 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
   if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
-    return true;
+    return CheckCompleteListDirectedField(io, edit);
   }
   // Fast path wasn&#x27;t available or didn&#x27;t work; go the more general route
   static constexpr int maxDigits{
@@ -465,7 +532,11 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
     return false;
   }
   if (got == 0) {
-    io.GetIoErrorHandler().SignalError(IostatBadRealInput);
+    const auto &connection{io.GetConnectionState()};
+    io.GetIoErrorHandler().SignalError(IostatBadRealInput,
+        "Bad real input data at column %d of record %d",
+        static_cast<int>(connection.positionInRecord + 1),
+        static_cast<int>(connection.currentRecordNumber));
     return false;
   }
   bool hadExtra{got > maxDigits};
@@ -512,7 +583,11 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
         converted.flags | decimal::Inexact);
   }
   if (*p) { // unprocessed junk after value
-    io.GetIoErrorHandler().SignalError(IostatBadRealInput);
+    const auto &connection{io.GetConnectionState()};
+    io.GetIoErrorHandler().SignalError(IostatBadRealInput,
+        "Trailing characters after real input data at column %d of record %d",
+        static_cast<int>(connection.positionInRecord + 1),
+        static_cast<int>(connection.currentRecordNumber));
     return false;
   }
   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
@@ -525,7 +600,7 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
     }
     RaiseFPExceptions(converted.flags);
   }
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 template <int KIND>
@@ -602,13 +677,13 @@ bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
         "Bad character &#x27;%lc&#x27; in LOGICAL input field", *next);
     return false;
   }
-  if (remaining) { // ignore the rest of the field
+  if (remaining) { // ignore the rest of a fixed-width field
     io.HandleRelativePosition(*remaining);
   } else if (edit.descriptor == DataEdit::ListDirected) {
     while (io.NextInField(remaining, edit)) { // discard rest of field
     }
   }
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
@@ -800,7 +875,7 @@ bool EditCharacterInput(
   }
   // Pad the remainder of the input variable, if any.
   std::fill_n(x, length, &#x27; &#x27;);
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
diff --git a/flang/runtime/iostat.cpp b/flang/runtime/iostat.cpp
index d786e505433f8ec..cc5641693a078a6 100644
--- a/flang/runtime/iostat.cpp
+++ b/flang/runtime/iostat.cpp
@@ -113,6 +113,8 @@ const char *IostatErrorString(int iostat) {
     return "Impermissible I/O statement on child I/O unit";
   case IostatBadNewUnit:
     return "NEWUNIT= without FILE= or STATUS=&#x27;SCRATCH&#x27;";
+  case IostatBadListDirectedInputSeparator:
+    return "List-directed input value has trailing unused characters";
   default:
     return nullptr;
   }
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66244


More information about the flang-commits mailing list