[flang-commits] [flang] [flang][runtime] Enforce proper termination of list-directed input va… (PR #66244)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Wed Sep 13 10:01:46 PDT 2023


https://github.com/klausler created https://github.com/llvm/llvm-project/pull/66244:

…lues

Emit an error at runtime when a list-directed input value is not followed by a value separator or end of record.  Previously, the runtime I/O library was consuming as many input characters that were valid for the type of the value, and leaving any remaining characters for the next input edit, if any.

>From bf9528703134ca5d739b8cb3b8641703cb2937c8 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Fri, 8 Sep 2023 11:49:34 -0700
Subject: [PATCH] [flang][runtime] Enforce proper termination of list-directed
 input values

Emit an error at runtime when a list-directed input value is
not followed by a value separator or end of record.  Previously,
the runtime I/O library was consuming as many input characters
that were valid for the type of the value, and leaving any
remaining characters for the next input edit, if any.
---
 flang/include/flang/Runtime/iostat.h |   1 +
 flang/runtime/edit-input.cpp         | 139 +++++++++++++++++++++------
 flang/runtime/iostat.cpp             |   2 +
 3 files changed, 110 insertions(+), 32 deletions(-)

diff --git a/flang/include/flang/Runtime/iostat.h b/flang/include/flang/Runtime/iostat.h
index faadaab8e90ffa9..0456e24f4e381ba 100644
--- a/flang/include/flang/Runtime/iostat.h
+++ b/flang/include/flang/Runtime/iostat.h
@@ -84,6 +84,7 @@ enum Iostat {
   IostatBadFlushUnit,
   IostatBadOpOnChildUnit,
   IostatBadNewUnit,
+  IostatBadListDirectedInputSeparator,
 };
 
 const char *IostatErrorString(int);
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 98627c9dd82750a..a89612ab74640fb 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -16,6 +16,49 @@
 
 namespace Fortran::runtime::io {
 
+// Checks that a list-directed input value has been entirely consumed and
+// doesn't contain unparsed characters before the next value separator.
+static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) {
+  char32_t comma{
+      edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
+  return ch == ' ' || ch == '\t' || ch == '/' || ch == comma;
+}
+
+static inline bool IsListDirectedFieldComplete(
+    IoStatementState &io, const DataEdit &edit) {
+  std::size_t byteCount;
+  if (auto ch{io.GetCurrentChar(byteCount)}) {
+    return IsCharValueSeparator(edit, *ch);
+  } else {
+    return true; // end of record: ok
+  }
+}
+
+static bool CheckCompleteListDirectedField(
+    IoStatementState &io, const DataEdit &edit) {
+  if (edit.IsListDirected()) {
+    std::size_t byteCount;
+    if (auto ch{io.GetCurrentChar(byteCount)}) {
+      if (IsCharValueSeparator(edit, *ch)) {
+        return true;
+      } else {
+        const auto &connection{io.GetConnectionState()};
+        io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
+            "invalid character (0x%x) after list-directed input value, "
+            "at column %d in record %d",
+            static_cast<unsigned>(*ch),
+            static_cast<int>(connection.positionInRecord + 1),
+            static_cast<int>(connection.currentRecordNumber));
+        return false;
+      }
+    } else {
+      return true; // end of record: ok
+    }
+  } else {
+    return true;
+  }
+}
+
 template <int LOG2_BASE>
 static bool EditBOZInput(
     IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
@@ -89,28 +132,29 @@ static bool EditBOZInput(
     *data |= digit << shift;
     shift -= LOG2_BASE;
   }
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 static inline char32_t GetDecimalPoint(const DataEdit &edit) {
   return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
 }
 
-// Prepares input from a field, and consumes the sign, if any.
-// Returns true if there's a '-' sign.
-static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
+// Prepares input from a field, and returns the sign, if any, else '\0'.
+static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
     std::optional<char32_t> &next, std::optional<int> &remaining) {
   remaining = io.CueUpInput(edit);
   next = io.NextInField(remaining, edit);
-  bool negative{false};
+  char sign{'\0'};
   if (next) {
-    negative = *next == '-';
-    if (negative || *next == '+') {
-      io.SkipSpaces(remaining);
+    if (*next == '-' || *next == '+') {
+      sign = *next;
+      if (!edit.IsListDirected()) {
+        io.SkipSpaces(remaining);
+      }
       next = io.NextInField(remaining, edit);
     }
   }
-  return negative;
+  return sign;
 }
 
 bool EditIntegerInput(
@@ -141,9 +185,9 @@ bool EditIntegerInput(
   }
   std::optional<int> remaining;
   std::optional<char32_t> next;
-  bool negate{ScanNumericPrefix(io, edit, next, remaining)};
+  char sign{ScanNumericPrefix(io, edit, next, remaining)};
   common::UnsignedInt128 value{0};
-  bool any{negate};
+  bool any{!!sign};
   bool overflow{false};
   for (; next; next = io.NextInField(remaining, edit)) {
     char32_t ch{*next};
@@ -178,13 +222,13 @@ bool EditIntegerInput(
     return false;
   }
   auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
-  overflow |= value >= maxForKind && (value > maxForKind || !negate);
+  overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
   if (overflow) {
     io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
         "Decimal input overflows INTEGER(%d) variable", kind);
     return false;
   }
-  if (negate) {
+  if (sign == '-') {
     value = -value;
   }
   if (any || !io.GetConnectionState().IsAtEOF()) {
@@ -212,13 +256,17 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
     }
     ++got;
   }};
-  if (ScanNumericPrefix(io, edit, next, remaining)) {
+  char sign{ScanNumericPrefix(io, edit, next, remaining)};
+  if (sign == '-') {
     Put('-');
   }
   bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
-  if (!next || (!bzMode && *next == ' ')) { // empty/blank field means zero
-    remaining.reset();
-    if (!io.GetConnectionState().IsAtEOF()) {
+  if (!next || (!bzMode && *next == ' ')) {
+    if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
+      // An empty/blank field means zero when not list-directed.
+      // A fixed-width field containing only a sign is also zero;
+      // this behavior isn't standard-conforming in F'2023 but it is
+      // required to pass FCVS.
       Put('0');
     }
     return got;
@@ -286,6 +334,10 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
       // the FCVS suite.
       Put('0'); // emit at least one digit
     }
+    // In list-directed input, a bad exponent is not consumed.
+    auto nextBeforeExponent{next};
+    auto startExponent{io.GetConnectionState().positionInRecord};
+    bool hasGoodExponent{false};
     if (next &&
         (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
             *next == 'q' || *next == 'Q')) {
@@ -306,11 +358,13 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
       }
       for (exponent = 0; next; next = io.NextInField(remaining, edit)) {
         if (*next >= '0' && *next <= '9') {
+          hasGoodExponent = true;
           if (exponent < 10000) {
             exponent = 10 * exponent + *next - '0';
           }
         } else if (*next == ' ' || *next == '\t') {
           if (bzMode) {
+            hasGoodExponent = true;
             exponent = 10 * exponent;
           }
         } else {
@@ -321,6 +375,11 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
         exponent = -exponent;
       }
     }
+    if (!hasGoodExponent) {
+      // There isn't a good exponent; do not consume it.
+      next = nextBeforeExponent;
+      io.HandleAbsolutePosition(startExponent);
+    }
     if (decimalPoint) {
       exponent += *decimalPoint;
     } else {
@@ -339,6 +398,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
   // input value.
   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
     if (next && (*next == ' ' || *next == '\t')) {
+      io.SkipSpaces(remaining);
       next = io.NextInField(remaining, edit);
     }
     if (!next) { // NextInField fails on separators like ')'
@@ -423,19 +483,26 @@ static bool TryFastPathRealInput(
       return false;
     }
   }
-  for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
-  }
   if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
-    // Need to consume a trailing ')' and any white space after
-    if (p >= limit || *p != ')') {
+    // Need to consume a trailing ')', possibly with leading spaces
+    for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
+    }
+    if (p < limit && *p == ')') {
+      ++p;
+    } else {
+      return false;
+    }
+  } else if (edit.IsListDirected()) {
+    if (p < limit && !IsCharValueSeparator(edit, *p)) {
       return false;
     }
-    for (++p; p < limit && (*p == ' ' || *p == '\t'); ++p) {
+  } else {
+    for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
+    }
+    if (edit.width && p < str + *edit.width) {
+      return false; // unconverted characters remain in fixed width field
     }
   }
-  if (edit.width && p < str + *edit.width) {
-    return false; // unconverted characters remain in fixed width field
-  }
   // Success on the fast path!
   *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
       converted.binary;
@@ -451,7 +518,7 @@ template <int KIND>
 bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
   constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
   if (TryFastPathRealInput<binaryPrecision>(io, edit, n)) {
-    return true;
+    return CheckCompleteListDirectedField(io, edit);
   }
   // Fast path wasn't available or didn't work; go the more general route
   static constexpr int maxDigits{
@@ -465,7 +532,11 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
     return false;
   }
   if (got == 0) {
-    io.GetIoErrorHandler().SignalError(IostatBadRealInput);
+    const auto &connection{io.GetConnectionState()};
+    io.GetIoErrorHandler().SignalError(IostatBadRealInput,
+        "Bad real input data at column %d of record %d",
+        static_cast<int>(connection.positionInRecord + 1),
+        static_cast<int>(connection.currentRecordNumber));
     return false;
   }
   bool hadExtra{got > maxDigits};
@@ -512,7 +583,11 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
         converted.flags | decimal::Inexact);
   }
   if (*p) { // unprocessed junk after value
-    io.GetIoErrorHandler().SignalError(IostatBadRealInput);
+    const auto &connection{io.GetConnectionState()};
+    io.GetIoErrorHandler().SignalError(IostatBadRealInput,
+        "Trailing characters after real input data at column %d of record %d",
+        static_cast<int>(connection.positionInRecord + 1),
+        static_cast<int>(connection.currentRecordNumber));
     return false;
   }
   *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
@@ -525,7 +600,7 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
     }
     RaiseFPExceptions(converted.flags);
   }
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 template <int KIND>
@@ -602,13 +677,13 @@ bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
         "Bad character '%lc' in LOGICAL input field", *next);
     return false;
   }
-  if (remaining) { // ignore the rest of the field
+  if (remaining) { // ignore the rest of a fixed-width field
     io.HandleRelativePosition(*remaining);
   } else if (edit.descriptor == DataEdit::ListDirected) {
     while (io.NextInField(remaining, edit)) { // discard rest of field
     }
   }
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 // See 13.10.3.1 paragraphs 7-9 in Fortran 2018
@@ -800,7 +875,7 @@ bool EditCharacterInput(
   }
   // Pad the remainder of the input variable, if any.
   std::fill_n(x, length, ' ');
-  return true;
+  return CheckCompleteListDirectedField(io, edit);
 }
 
 template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
diff --git a/flang/runtime/iostat.cpp b/flang/runtime/iostat.cpp
index d786e505433f8ec..cc5641693a078a6 100644
--- a/flang/runtime/iostat.cpp
+++ b/flang/runtime/iostat.cpp
@@ -113,6 +113,8 @@ const char *IostatErrorString(int iostat) {
     return "Impermissible I/O statement on child I/O unit";
   case IostatBadNewUnit:
     return "NEWUNIT= without FILE= or STATUS='SCRATCH'";
+  case IostatBadListDirectedInputSeparator:
+    return "List-directed input value has trailing unused characters";
   default:
     return nullptr;
   }



More information about the flang-commits mailing list