[flang-commits] [flang] 353d56d - [flang][runtime] Fix fixed-width field internal wide character input (#74683)
via flang-commits
flang-commits at lists.llvm.org
Mon Dec 11 13:05:34 PST 2023
Author: Peter Klausler
Date: 2023-12-11T13:05:29-08:00
New Revision: 353d56d22bfb77ee48554705c50ee71b115854fd
URL: https://github.com/llvm/llvm-project/commit/353d56d22bfb77ee48554705c50ee71b115854fd
DIFF: https://github.com/llvm/llvm-project/commit/353d56d22bfb77ee48554705c50ee71b115854fd.diff
LOG: [flang][runtime] Fix fixed-width field internal wide character input (#74683)
There was some confusion about units (bytes vs characters) in the
handling of the amount of input remaining in fixed-width formatted input
fields. Clarify that any variable or parameter counting "remaining"
space in a field in the I/O runtime is always in units of bytes, and
make it so where it wasn't.
Fixes the bug(s) in
llvm-test-suite/Fortran/gfortran/regression/char4_iunit_2.f03, although
the test still won't pass due to its dependence on gfortran's
list-directed output spacing.
Added:
Modified:
flang/runtime/edit-input.cpp
flang/runtime/io-stmt.h
Removed:
################################################################################
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 7a868d56f075e..822099b5141b1 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -916,20 +916,20 @@ static bool EditListDirectedCharacterInput(
}
template <typename CHAR>
-bool EditCharacterInput(
- IoStatementState &io, const DataEdit &edit, CHAR *x, std::size_t length) {
+bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
+ std::size_t lengthChars) {
switch (edit.descriptor) {
case DataEdit::ListDirected:
- return EditListDirectedCharacterInput(io, x, length, edit);
+ return EditListDirectedCharacterInput(io, x, lengthChars, edit);
case 'A':
case 'G':
break;
case 'B':
- return EditBOZInput<1>(io, edit, x, length * sizeof *x);
+ return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
case 'O':
- return EditBOZInput<3>(io, edit, x, length * sizeof *x);
+ return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
case 'Z':
- return EditBOZInput<4>(io, edit, x, length * sizeof *x);
+ return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
default:
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
@@ -937,27 +937,31 @@ bool EditCharacterInput(
return false;
}
const ConnectionState &connection{io.GetConnectionState()};
- std::size_t remaining{length};
+ std::size_t remainingChars{lengthChars};
+ // Skip leading characters.
+ // Their bytes don't count towards INQUIRE(IOLENGTH=).
+ std::size_t skipChars{0};
if (edit.width && *edit.width > 0) {
- remaining = *edit.width;
+ remainingChars = *edit.width;
+ if (remainingChars > lengthChars) {
+ skipChars = remainingChars - lengthChars;
+ }
}
// When the field is wider than the variable, we drop the leading
// characters. When the variable is wider than the field, there can be
// trailing padding or an EOR condition.
const char *input{nullptr};
- std::size_t ready{0};
- // Skip leading bytes.
- // These bytes don't count towards INQUIRE(IOLENGTH=).
- std::size_t skip{remaining > length ? remaining - length : 0};
+ std::size_t readyBytes{0};
// Transfer payload bytes; these do count.
- while (remaining > 0) {
- if (ready == 0) {
- ready = io.GetNextInputBytes(input);
- if (ready == 0 || (ready < remaining && edit.modes.nonAdvancing)) {
- if (io.CheckForEndOfRecord(ready)) {
- if (ready == 0) {
+ while (remainingChars > 0) {
+ if (readyBytes == 0) {
+ readyBytes = io.GetNextInputBytes(input);
+ if (readyBytes == 0 ||
+ (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
+ if (io.CheckForEndOfRecord(readyBytes)) {
+ if (readyBytes == 0) {
// PAD='YES' and no more data
- std::fill_n(x, length, ' ');
+ std::fill_n(x, lengthChars, ' ');
return !io.GetIoErrorHandler().InError();
} else {
// Do partial read(s) then pad on last iteration
@@ -967,63 +971,64 @@ bool EditCharacterInput(
}
}
}
- std::size_t chunk;
- bool skipping{skip > 0};
+ std::size_t chunkBytes;
+ std::size_t chunkChars{1};
+ bool skipping{skipChars > 0};
if (connection.isUTF8) {
- chunk = MeasureUTF8Bytes(*input);
+ chunkBytes = MeasureUTF8Bytes(*input);
if (skipping) {
- --skip;
+ --skipChars;
} else if (auto ucs{DecodeUTF8(input)}) {
*x++ = *ucs;
- --length;
- } else if (chunk == 0) {
+ --lengthChars;
+ } else if (chunkBytes == 0) {
// error recovery: skip bad encoding
- chunk = 1;
+ chunkBytes = 1;
}
- --remaining;
} else if (connection.internalIoCharKind > 1) {
// Reading from non-default character internal unit
- chunk = connection.internalIoCharKind;
+ chunkBytes = connection.internalIoCharKind;
if (skipping) {
- --skip;
+ --skipChars;
} else {
char32_t buffer{0};
- std::memcpy(&buffer, input, chunk);
+ std::memcpy(&buffer, input, chunkBytes);
*x++ = buffer;
- --length;
+ --lengthChars;
}
- --remaining;
} else if constexpr (sizeof *x > 1) {
// Read single byte with expansion into multi-byte CHARACTER
- chunk = 1;
+ chunkBytes = 1;
if (skipping) {
- --skip;
+ --skipChars;
} else {
*x++ = static_cast<unsigned char>(*input);
- --length;
+ --lengthChars;
}
- --remaining;
} else { // single bytes -> default CHARACTER
if (skipping) {
- chunk = std::min<std::size_t>(skip, ready);
- skip -= chunk;
+ chunkBytes = std::min<std::size_t>(skipChars, readyBytes);
+ chunkChars = chunkBytes;
+ skipChars -= chunkChars;
} else {
- chunk = std::min<std::size_t>(remaining, ready);
- std::memcpy(x, input, chunk);
- x += chunk;
- length -= chunk;
+ chunkBytes = std::min<std::size_t>(remainingChars, readyBytes);
+ chunkBytes = std::min<std::size_t>(lengthChars, chunkBytes);
+ chunkChars = chunkBytes;
+ std::memcpy(x, input, chunkBytes);
+ x += chunkBytes;
+ lengthChars -= chunkChars;
}
- remaining -= chunk;
}
- input += chunk;
+ input += chunkBytes;
+ remainingChars -= chunkChars;
if (!skipping) {
- io.GotChar(chunk);
+ io.GotChar(chunkBytes);
}
- io.HandleRelativePosition(chunk);
- ready -= chunk;
+ io.HandleRelativePosition(chunkBytes);
+ readyBytes -= chunkBytes;
}
// Pad the remainder of the input variable, if any.
- std::fill_n(x, length, ' ');
+ std::fill_n(x, lengthChars, ' ');
return CheckCompleteListDirectedField(io, edit);
}
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index d4ceb83265246..91169f6c6e323 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -92,8 +92,8 @@ class IoStatementState {
std::size_t GetNextInputBytes(const char *&);
bool AdvanceRecord(int = 1);
void BackspaceRecord();
- void HandleRelativePosition(std::int64_t);
- void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
+ void HandleRelativePosition(std::int64_t byteOffset);
+ void HandleAbsolutePosition(std::int64_t byteOffset); // for r* in list I/O
std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
bool BeginReadingRecord();
@@ -124,7 +124,11 @@ class IoStatementState {
// Vacant after the end of the current record
std::optional<char32_t> GetCurrentChar(std::size_t &byteCount);
- // For fixed-width fields, return the number of remaining characters.
+ // The "remaining" arguments to CueUpInput(), SkipSpaces(), & NextInField()
+ // are always in units of bytes, not characters; the distinction matters
+ // for internal input from CHARACTER(KIND=2 and 4).
+
+ // For fixed-width fields, return the number of remaining bytes.
// Skip over leading blanks.
std::optional<int> CueUpInput(const DataEdit &edit) {
std::optional<int> remaining;
@@ -134,6 +138,10 @@ class IoStatementState {
} else {
if (edit.width.value_or(0) > 0) {
remaining = *edit.width;
+ if (int bytesPerChar{GetConnectionState().internalIoCharKind};
+ bytesPerChar > 1) {
+ *remaining *= bytesPerChar;
+ }
}
SkipSpaces(remaining);
}
More information about the flang-commits
mailing list