[flang-commits] [flang] [flang][runtime] Fix fixed-width field internal wide character input (PR #74683)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Sat Dec 9 07:44:41 PST 2023


https://github.com/klausler updated https://github.com/llvm/llvm-project/pull/74683

>From c4741dc477a1bdc312d4aa4e4d39a04aa8329b1b Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Wed, 6 Dec 2023 16:31:18 -0800
Subject: [PATCH] [flang][runtime] Fix fixed-width field internal wide
 character input

There was some confusion about units (bytes vs characters) in the
handling of the amount of input remaining in fixed-width formatted
input fields.  Clarify that any variable or parameter counting "remaining"
space in a field in the I/O runtime is always in units of bytes, and
make it so where it wasn't.  Rename many local variables so that their
units (characters or bytes) are more clear.

Fixes the bug(s) in llvm-test-suite/Fortran/gfortran/regression/char4_iunit_2.f03,
although the test still won't pass due to its dependence on gfortran's
list-directed output spacing.
---
 flang/runtime/edit-input.cpp | 101 ++++++++++++++++++-----------------
 flang/runtime/io-stmt.h      |  14 +++--
 2 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 4e8c9aa868a69..f38fb998d77ba 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -894,20 +894,20 @@ static bool EditListDirectedCharacterInput(
 }
 
 template <typename CHAR>
-bool EditCharacterInput(
-    IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
+bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
+    std::size_t lengthChars) {
   switch (edit.descriptor) {
   case DataEdit::ListDirected:
-    return EditListDirectedCharacterInput(io, x, length, edit);
+    return EditListDirectedCharacterInput(io, x, lengthChars, edit);
   case 'A':
   case 'G':
     break;
   case 'B':
-    return EditBOZInput<1>(io, edit, x, length * sizeof *x);
+    return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
   case 'O':
-    return EditBOZInput<3>(io, edit, x, length * sizeof *x);
+    return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
   case 'Z':
-    return EditBOZInput<4>(io, edit, x, length * sizeof *x);
+    return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
@@ -915,27 +915,31 @@ bool EditCharacterInput(
     return false;
   }
   const ConnectionState &connection{io.GetConnectionState()};
-  std::size_t remaining{length};
+  std::size_t remainingChars{lengthChars};
+  // Skip leading characters.
+  // Their bytes don't count towards INQUIRE(IOLENGTH=).
+  std::size_t skipChars{0};
   if (edit.width && *edit.width > 0) {
-    remaining = *edit.width;
+    remainingChars = *edit.width;
+    if (remainingChars > lengthChars) {
+      skipChars = remainingChars - lengthChars;
+    }
   }
   // When the field is wider than the variable, we drop the leading
   // characters.  When the variable is wider than the field, there can be
   // trailing padding or an EOR condition.
   const char *input{nullptr};
-  std::size_t ready{0};
-  // Skip leading bytes.
-  // These bytes don't count towards INQUIRE(IOLENGTH=).
-  std::size_t skip{remaining > length ? remaining - length : 0};
+  std::size_t readyBytes{0};
   // Transfer payload bytes; these do count.
-  while (remaining > 0) {
-    if (ready == 0) {
-      ready = io.GetNextInputBytes(input);
-      if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
-        if (io.CheckForEndOfRecord(ready)) {
-          if (ready == 0) {
+  while (remainingChars > 0) {
+    if (readyBytes == 0) {
+      readyBytes = io.GetNextInputBytes(input);
+      if (readyBytes == 0 ||
+          (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
+        if (io.CheckForEndOfRecord(readyBytes)) {
+          if (readyBytes == 0) {
             // PAD='YES' and no more data
-            std::fill_n(x, length, ' ');
+            std::fill_n(x, lengthChars, ' ');
             return !io.GetIoErrorHandler().InError();
           } else {
             // Do partial read(s) then pad on last iteration
@@ -945,63 +949,64 @@ bool EditCharacterInput(
         }
       }
     }
-    std::size_t chunk;
-    bool skipping{skip > 0};
+    std::size_t chunkBytes;
+    std::size_t chunkChars{1};
+    bool skipping{skipChars > 0};
     if (connection.isUTF8) {
-      chunk = MeasureUTF8Bytes(*input);
+      chunkBytes = MeasureUTF8Bytes(*input);
       if (skipping) {
-        --skip;
+        --skipChars;
       } else if (auto ucs{DecodeUTF8(input)}) {
         *x++ = *ucs;
-        --length;
-      } else if (chunk == 0) {
+        --lengthChars;
+      } else if (chunkBytes == 0) {
         // error recovery: skip bad encoding
-        chunk = 1;
+        chunkBytes = 1;
       }
-      --remaining;
     } else if (connection.internalIoCharKind > 1) {
       // Reading from non-default character internal unit
-      chunk = connection.internalIoCharKind;
+      chunkBytes = connection.internalIoCharKind;
       if (skipping) {
-        --skip;
+        --skipChars;
       } else {
         char32_t buffer{0};
-        std::memcpy(&buffer, input, chunk);
+        std::memcpy(&buffer, input, chunkBytes);
         *x++ = buffer;
-        --length;
+        --lengthChars;
       }
-      --remaining;
     } else if constexpr (sizeof *x > 1) {
       // Read single byte with expansion into multi-byte CHARACTER
-      chunk = 1;
+      chunkBytes = 1;
       if (skipping) {
-        --skip;
+        --skipChars;
       } else {
         *x++ = static_cast<unsigned char>(*input);
-        --length;
+        --lengthChars;
       }
-      --remaining;
     } else { // single bytes -> default CHARACTER
       if (skipping) {
-        chunk = std::min<std::size_t>(skip, ready);
-        skip -= chunk;
+        chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
+        chunkChars = chunkBytes;
+        skipChars -= chunkChars;
       } else {
-        chunk = std::min<std::size_t>(remaining, ready);
-        std::memcpy(x, input, chunk);
-        x += chunk;
-        length -= chunk;
+        chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
+        chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
+        chunkChars = chunkBytes;
+        std::memcpy(x, input, chunkBytes);
+        x += chunkBytes;
+        lengthChars -= chunkChars;
       }
-      remaining -= chunk;
     }
-    input += chunk;
+    input += chunkBytes;
+    remainingChars -= chunkChars;
     if (!skipping) {
-      io.GotChar(chunk);
+      io.GotChar(chunkBytes);
     }
-    io.HandleRelativePosition(chunk);
-    ready -= chunk;
+    io.HandleRelativePosition(chunkBytes);
+    readyBytes -= chunkBytes;
   }
   // Pad the remainder of the input variable, if any.
-  std::fill_n(x, length, ' ');
+  std::fill_n(x, lengthChars, ' ');
   return CheckCompleteListDirectedField(io, edit);
 }
 
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index d4ceb83265246..91169f6c6e323 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -92,8 +92,8 @@ class IoStatementState {
   std::size_t GetNextInputBytes(const char *&);
   bool AdvanceRecord(int = 1);
   void BackspaceRecord();
-  void HandleRelativePosition(std::int64_t);
-  void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
+  void HandleRelativePosition(std::int64_t byteOffset);
+  void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
   std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
   ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
   bool BeginReadingRecord();
@@ -124,7 +124,11 @@ class IoStatementState {
   // Vacant after the end of the current record
   std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);
 
-  // For fixed-width fields, return the number of remaining characters.
+  // The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
+  // are always in units of bytes, not characters; the distinction matters
+  // for internal input from CHARACTER(KIND=2 and 4).
+
+  // For fixed-width fields, return the number of remaining bytes.
   // Skip over leading blanks.
   std::optional<int> CueUpInput(const DataEdit &edit) {
     std::optional<int> remaining;
@@ -134,6 +138,10 @@ class IoStatementState {
     } else {
       if (edit.width.value_or(0) > 0) {
         remaining = *edit.width;
+        if (int bytesPerChar{GetConnectionState().internalIoCharKind};
+            bytesPerChar > 1) {
+          *remaining *= bytesPerChar;
+        }
       }
       SkipSpaces(remaining);
     }



More information about the flang-commits mailing list