[flang-commits] [flang] 353d56d - [flang][runtime] Fix fixed-width field internal wide character input (#74683)

via flang-commits flang-commits at lists.llvm.org
Mon Dec 11 13:05:34 PST 2023


Author: Peter Klausler
Date: 2023-12-11T13:05:29-08:00
New Revision: 353d56d22bfb77ee48554705c50ee71b115854fd

URL: https://github.com/llvm/llvm-project/commit/353d56d22bfb77ee48554705c50ee71b115854fd
DIFF: https://github.com/llvm/llvm-project/commit/353d56d22bfb77ee48554705c50ee71b115854fd.diff

LOG: [flang][runtime] Fix fixed-width field internal wide character input (#74683)

There was some confusion about units (bytes vs characters) in the
handling of the amount of input remaining in fixed-width formatted input
fields. Clarify that any variable or parameter counting "remaining"
space in a field in the I/O runtime is always in units of bytes, and
make it so where it wasn't.

Fixes the bug(s) in
llvm-test-suite/Fortran/gfortran/regression/char4_iunit_2.f03, although
the test still won't pass due to its dependence on gfortran's
list-directed output spacing.

Added: 
    

Modified: 
    flang/runtime/edit-input.cpp
    flang/runtime/io-stmt.h

Removed: 
    


################################################################################
diff  --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 7a868d56f075e..822099b5141b1 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -916,20 +916,20 @@ static bool EditListDirectedCharacterInput(
 }
 
 template <typename CHAR>
-bool EditCharacterInput(
-    IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
+bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
+    std::size_t lengthChars) {
   switch (edit.descriptor) {
   case DataEdit::ListDirected:
-    return EditListDirectedCharacterInput(io, x, length, edit);
+    return EditListDirectedCharacterInput(io, x, lengthChars, edit);
   case 'A':
   case 'G':
     break;
   case 'B':
-    return EditBOZInput<1>(io, edit, x, length * sizeof *x);
+    return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
   case 'O':
-    return EditBOZInput<3>(io, edit, x, length * sizeof *x);
+    return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
   case 'Z':
-    return EditBOZInput<4>(io, edit, x, length * sizeof *x);
+    return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a CHARACTER data item",
@@ -937,27 +937,31 @@ bool EditCharacterInput(
     return false;
   }
   const ConnectionState &connection{io.GetConnectionState()};
-  std::size_t remaining{length};
+  std::size_t remainingChars{lengthChars};
+  // Skip leading characters.
+  // Their bytes don't count towards INQUIRE(IOLENGTH=).
+  std::size_t skipChars{0};
   if (edit.width && *edit.width > 0) {
-    remaining = *edit.width;
+    remainingChars = *edit.width;
+    if (remainingChars > lengthChars) {
+      skipChars = remainingChars - lengthChars;
+    }
   }
   // When the field is wider than the variable, we drop the leading
   // characters.  When the variable is wider than the field, there can be
   // trailing padding or an EOR condition.
   const char *input{nullptr};
-  std::size_t ready{0};
-  // Skip leading bytes.
-  // These bytes don't count towards INQUIRE(IOLENGTH=).
-  std::size_t skip{remaining > length ? remaining - length : 0};
+  std::size_t readyBytes{0};
   // Transfer payload bytes; these do count.
-  while (remaining > 0) {
-    if (ready == 0) {
-      ready = io.GetNextInputBytes(input);
-      if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
-        if (io.CheckForEndOfRecord(ready)) {
-          if (ready == 0) {
+  while (remainingChars > 0) {
+    if (readyBytes == 0) {
+      readyBytes = io.GetNextInputBytes(input);
+      if (readyBytes == 0 ||
+          (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
+        if (io.CheckForEndOfRecord(readyBytes)) {
+          if (readyBytes == 0) {
             // PAD='YES' and no more data
-            std::fill_n(x, length, ' ');
+            std::fill_n(x, lengthChars, ' ');
             return !io.GetIoErrorHandler().InError();
           } else {
             // Do partial read(s) then pad on last iteration
@@ -967,63 +971,64 @@ bool EditCharacterInput(
         }
       }
     }
-    std::size_t chunk;
-    bool skipping{skip > 0};
+    std::size_t chunkBytes;
+    std::size_t chunkChars{1};
+    bool skipping{skipChars > 0};
     if (connection.isUTF8) {
-      chunk = MeasureUTF8Bytes(*input);
+      chunkBytes = MeasureUTF8Bytes(*input);
       if (skipping) {
-        --skip;
+        --skipChars;
       } else if (auto ucs{DecodeUTF8(input)}) {
         *x++ = *ucs;
-        --length;
-      } else if (chunk == 0) {
+        --lengthChars;
+      } else if (chunkBytes == 0) {
         // error recovery: skip bad encoding
-        chunk = 1;
+        chunkBytes = 1;
       }
-      --remaining;
     } else if (connection.internalIoCharKind > 1) {
       // Reading from non-default character internal unit
-      chunk = connection.internalIoCharKind;
+      chunkBytes = connection.internalIoCharKind;
       if (skipping) {
-        --skip;
+        --skipChars;
       } else {
         char32_t buffer{0};
-        std::memcpy(&buffer, input, chunk);
+        std::memcpy(&buffer, input, chunkBytes);
         *x++ = buffer;
-        --length;
+        --lengthChars;
       }
-      --remaining;
     } else if constexpr (sizeof *x > 1) {
       // Read single byte with expansion into multi-byte CHARACTER
-      chunk = 1;
+      chunkBytes = 1;
       if (skipping) {
-        --skip;
+        --skipChars;
       } else {
         *x++ = static_cast<unsigned char>(*input);
-        --length;
+        --lengthChars;
       }
-      --remaining;
     } else { // single bytes -> default CHARACTER
       if (skipping) {
-        chunk = std::min<std::size_t>(skip, ready);
-        skip -= chunk;
+        chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
+        chunkChars = chunkBytes;
+        skipChars -= chunkChars;
       } else {
-        chunk = std::min<std::size_t>(remaining, ready);
-        std::memcpy(x, input, chunk);
-        x += chunk;
-        length -= chunk;
+        chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
+        chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
+        chunkChars = chunkBytes;
+        std::memcpy(x, input, chunkBytes);
+        x += chunkBytes;
+        lengthChars -= chunkChars;
       }
-      remaining -= chunk;
     }
-    input += chunk;
+    input += chunkBytes;
+    remainingChars -= chunkChars;
     if (!skipping) {
-      io.GotChar(chunk);
+      io.GotChar(chunkBytes);
     }
-    io.HandleRelativePosition(chunk);
-    ready -= chunk;
+    io.HandleRelativePosition(chunkBytes);
+    readyBytes -= chunkBytes;
   }
   // Pad the remainder of the input variable, if any.
-  std::fill_n(x, length, ' ');
+  std::fill_n(x, lengthChars, ' ');
   return CheckCompleteListDirectedField(io, edit);
 }
 

diff  --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index d4ceb83265246..91169f6c6e323 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -92,8 +92,8 @@ class IoStatementState {
   std::size_t GetNextInputBytes(const char *&);
   bool AdvanceRecord(int = 1);
   void BackspaceRecord();
-  void HandleRelativePosition(std::int64_t);
-  void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
+  void HandleRelativePosition(std::int64_t byteOffset);
+  void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
   std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
   ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
   bool BeginReadingRecord();
@@ -124,7 +124,11 @@ class IoStatementState {
   // Vacant after the end of the current record
   std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);
 
-  // For fixed-width fields, return the number of remaining characters.
+  // The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
+  // are always in units of bytes, not characters; the distinction matters
+  // for internal input from CHARACTER(KIND=2 and 4).
+
+  // For fixed-width fields, return the number of remaining bytes.
   // Skip over leading blanks.
   std::optional<int> CueUpInput(const DataEdit &edit) {
     std::optional<int> remaining;
@@ -134,6 +138,10 @@ class IoStatementState {
     } else {
       if (edit.width.value_or(0) > 0) {
         remaining = *edit.width;
+        if (int bytesPerChar{GetConnectionState().internalIoCharKind};
+            bytesPerChar > 1) {
+          *remaining *= bytesPerChar;
+        }
       }
       SkipSpaces(remaining);
     }


        


More information about the flang-commits mailing list