[flang-commits] [flang] 53f775b - [flang][runtime] Support B/O/Z editing of CHARACTER
Peter Klausler via flang-commits
flang-commits at lists.llvm.org
Thu Apr 28 12:44:38 PDT 2022
Author: Peter Klausler
Date: 2022-04-28T12:44:31-07:00
New Revision: 53f775bbc0b8fbca941b5f7ad324b18884eddd7e
URL: https://github.com/llvm/llvm-project/commit/53f775bbc0b8fbca941b5f7ad324b18884eddd7e
DIFF: https://github.com/llvm/llvm-project/commit/53f775bbc0b8fbca941b5f7ad324b18884eddd7e.diff
LOG: [flang][runtime] Support B/O/Z editing of CHARACTER
This is a common extension, though semantics differ across
compilers. I've chosen to interpret the CHARACTER data
as if it were an arbitrary-precision integer value and
format or read it as such. This matches Intel's compilers
and nvfortran. (GNU Fortran can't handle lengths > 1 and XLF
seems to get the enddianness wrong.)
This patch generalizes the previous implementations of
B/O/Z input and output so that they'll work for arbitrary data
in memory, and then uses them for all B/O/Z input/output,
including (now) CHARACTER.
Differential Revision: https://reviews.llvm.org/D124547
Added:
Modified:
flang/runtime/edit-input.cpp
flang/runtime/edit-output.cpp
Removed:
################################################################################
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 439f49b2fc96..304394577861 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -16,37 +16,76 @@
namespace Fortran::runtime::io {
-static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
- int base, int totalBitSize) {
+template <int LOG2_BASE>
+static bool EditBOZInput(
+ IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
std::optional<int> remaining;
std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
- common::UnsignedInt128 value{0};
+ if (*next == '0') {
+ do {
+ next = io.NextInField(remaining, edit);
+ } while (next && *next == '0');
+ }
+ // Count significant digits after any leading white space & zeroes
+ int digits{0};
for (; next; next = io.NextInField(remaining, edit)) {
char32_t ch{*next};
if (ch == ' ' || ch == '\t') {
continue;
}
- int digit{0};
if (ch >= '0' && ch <= '1') {
- digit = ch - '0';
- } else if (base >= 8 && ch >= '2' && ch <= '7') {
- digit = ch - '0';
- } else if (base >= 10 && ch >= '8' && ch <= '9') {
- digit = ch - '0';
- } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
- digit = ch + 10 - 'A';
- } else if (base == 16 && ch >= 'a' && ch <= 'z') {
- digit = ch + 10 - 'a';
+ } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
+ } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
+ } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
+ } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
} else {
io.GetIoErrorHandler().SignalError(
"Bad character '%lc' in B/O/Z input field", ch);
return false;
}
- value *= base;
- value += digit;
+ ++digits;
+ }
+ auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
+ if (significantBytes > bytes) {
+ io.GetIoErrorHandler().SignalError(
+ "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
+ return false;
+ }
+ // Reset to start of significant digits
+ io.HandleRelativePosition(-digits);
+ remaining.reset();
+ // Make a second pass now that the digit count is known
+ std::memset(n, 0, bytes);
+ int increment{isHostLittleEndian ? -1 : 1};
+ auto *data{reinterpret_cast<unsigned char *>(n) +
+ (isHostLittleEndian ? significantBytes - 1 : 0)};
+ int shift{((digits - 1) * LOG2_BASE) & 7};
+ if (shift + LOG2_BASE > 8) {
+ shift -= 8; // misaligned octal
+ }
+ while (digits > 0) {
+ char32_t ch{*io.NextInField(remaining, edit)};
+ int digit{0};
+ if (ch >= '0' && ch <= '9') {
+ digit = ch - '0';
+ } else if (ch >= 'A' && ch <= 'F') {
+ digit = ch + 10 - 'A';
+ } else if (ch >= 'a' && ch <= 'f') {
+ digit = ch + 10 - 'a';
+ } else {
+ continue;
+ }
+ --digits;
+ if (shift < 0) {
+ shift += 8;
+ if (shift + LOG2_BASE > 8) { // misaligned octal
+ *data |= digit >> (8 - shift);
+ }
+ data += increment;
+ }
+ *data |= digit << shift;
+ shift -= LOG2_BASE;
}
- // TODO: check for overflow
- std::memcpy(n, &value, totalBitSize >> 3);
return true;
}
@@ -83,11 +122,11 @@ bool EditIntegerInput(
case 'I':
break;
case 'B':
- return EditBOZInput(io, edit, n, 2, kind << 3);
+ return EditBOZInput<1>(io, edit, n, kind);
case 'O':
- return EditBOZInput(io, edit, n, 8, kind << 3);
+ return EditBOZInput<3>(io, edit, n, kind);
case 'Z':
- return EditBOZInput(io, edit, n, 16, kind << 3);
+ return EditBOZInput<4>(io, edit, n, kind);
case 'A': // legacy extension
return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
default:
@@ -457,7 +496,6 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
template <int KIND>
bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
- constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
switch (edit.descriptor) {
case DataEdit::ListDirected:
if (IsNamelistName(io)) {
@@ -472,14 +510,14 @@ bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
case 'G':
return EditCommonRealInput<KIND>(io, edit, n);
case 'B':
- return EditBOZInput(
- io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
+ return EditBOZInput<1>(io, edit, n,
+ common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
case 'O':
- return EditBOZInput(
- io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
+ return EditBOZInput<3>(io, edit, n,
+ common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
case 'Z':
- return EditBOZInput(
- io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
+ return EditBOZInput<4>(io, edit, n,
+ common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
case 'A': // legacy extension
return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
default:
@@ -590,7 +628,7 @@ static bool EditListDirectedCharacterInput(
// or the end of the current record. Subtlety: the "remaining" count
// here is a dummy that's used to avoid the interpretation of separators
// in NextInField.
- std::optional<int> remaining{maxUTF8Bytes};
+ std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
switch (*next) {
case ' ':
@@ -602,8 +640,7 @@ static bool EditListDirectedCharacterInput(
break;
default:
*x++ = *next;
- --length;
- remaining = maxUTF8Bytes;
+ remaining = --length > 0 ? maxUTF8Bytes : 0;
}
}
std::fill_n(x, length, ' ');
@@ -619,6 +656,12 @@ bool EditCharacterInput(
case 'A':
case 'G':
break;
+ case 'B':
+ return EditBOZInput<1>(io, edit, x, length * sizeof *x);
+ case 'O':
+ return EditBOZInput<3>(io, edit, x, length * sizeof *x);
+ case 'Z':
+ return EditBOZInput<4>(io, edit, x, length * sizeof *x);
default:
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp
index ebdc62b78bb6..842ee837d69e 100644
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -13,6 +13,85 @@
namespace Fortran::runtime::io {
+// B/O/Z output of arbitrarily sized data emits a binary/octal/hexadecimal
+// representation of what is interpreted to be a single unsigned integer value.
+// When used with character data, endianness is exposed.
+template <int LOG2_BASE>
+static bool EditBOZOutput(IoStatementState &io, const DataEdit &edit,
+ const unsigned char *data0, std::size_t bytes) {
+ int digits{static_cast<int>((bytes * 8) / LOG2_BASE)};
+ int get{static_cast<int>(bytes * 8) - digits * LOG2_BASE};
+ get = get ? get : LOG2_BASE;
+ int shift{7};
+ int increment{isHostLittleEndian ? -1 : 1};
+ const unsigned char *data{data0 + (isHostLittleEndian ? bytes - 1 : 0)};
+ int skippedZeroes{0};
+ int digit{0};
+ // The same algorithm is used to generate digits for real (below)
+ // as well as for generating them only to skip leading zeroes (here).
+ // Bits are copied one at a time from the source data.
+ // TODO: Multiple bit copies for hexadecimal, where misalignment
+ // is not possible; or for octal when all 3 bits come from the
+ // same byte.
+ while (bytes > 0) {
+ if (get == 0) {
+ if (digit != 0) {
+ break; // first nonzero leading digit
+ }
+ ++skippedZeroes;
+ get = LOG2_BASE;
+ } else if (shift < 0) {
+ data += increment;
+ --bytes;
+ shift = 7;
+ } else {
+ digit = 2 * digit + ((*data >> shift--) & 1);
+ --get;
+ }
+ }
+ // Emit leading spaces and zeroes; detect field overflow
+ int leadingZeroes{0};
+ int editWidth{edit.width.value_or(0)};
+ int significant{digits - skippedZeroes};
+ if (edit.digits && significant <= *edit.digits) { // Bw.m, Ow.m, Zw.m
+ if (*edit.digits == 0 && bytes == 0) {
+ editWidth = std::max(1, editWidth);
+ } else {
+ leadingZeroes = *edit.digits - significant;
+ }
+ } else if (bytes == 0) {
+ leadingZeroes = 1;
+ }
+ int subTotal{leadingZeroes + significant};
+ int leadingSpaces{std::max(0, editWidth - subTotal)};
+ if (editWidth > 0 && leadingSpaces + subTotal > editWidth) {
+ return io.EmitRepeated('*', editWidth);
+ }
+ if (!(io.EmitRepeated(' ', leadingSpaces) &&
+ io.EmitRepeated('0', leadingZeroes))) {
+ return false;
+ }
+ // Emit remaining digits
+ while (bytes > 0) {
+ if (get == 0) {
+ char ch{static_cast<char>(digit >= 10 ? 'A' + digit - 10 : '0' + digit)};
+ if (!io.Emit(&ch, 1)) {
+ return false;
+ }
+ get = LOG2_BASE;
+ digit = 0;
+ } else if (shift < 0) {
+ data += increment;
+ --bytes;
+ shift = 7;
+ } else {
+ digit = 2 * digit + ((*data >> shift--) & 1);
+ --get;
+ }
+ }
+ return true;
+}
+
template <int KIND>
bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
common::HostSignedIntType<8 * KIND> n) {
@@ -38,21 +117,14 @@ bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
}
break;
case 'B':
- for (; un > 0; un >>= 1) {
- *--p = '0' + (static_cast<int>(un) & 1);
- }
- break;
+ return EditBOZOutput<1>(
+ io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
case 'O':
- for (; un > 0; un >>= 3) {
- *--p = '0' + (static_cast<int>(un) & 7);
- }
- break;
+ return EditBOZOutput<3>(
+ io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
case 'Z':
- for (; un > 0; un >>= 4) {
- int digit = static_cast<int>(un) & 0xf;
- *--p = digit >= 10 ? 'A' + (digit - 10) : '0' + digit;
- }
- break;
+ return EditBOZOutput<4>(
+ io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
case 'A': // legacy extension
return EditCharacterOutput(
io, edit, reinterpret_cast<char *>(&n), sizeof n);
@@ -442,11 +514,17 @@ template <int KIND> bool RealOutputEditing<KIND>::Edit(const DataEdit &edit) {
case 'F':
return EditFOutput(edit);
case 'B':
+ return EditBOZOutput<1>(io_, edit,
+ reinterpret_cast<const unsigned char *>(&x_),
+ common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
case 'O':
+ return EditBOZOutput<3>(io_, edit,
+ reinterpret_cast<const unsigned char *>(&x_),
+ common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
case 'Z':
- return EditIntegerOutput<KIND>(io_, edit,
- static_cast<common::HostSignedIntType<8 * KIND>>(
- decimal::BinaryFloatingPointNumber<binaryPrecision>{x_}.raw()));
+ return EditBOZOutput<4>(io_, edit,
+ reinterpret_cast<const unsigned char *>(&x_),
+ common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
case 'G':
return Edit(EditForGOutput(edit));
case 'A': // legacy extension
@@ -475,6 +553,15 @@ bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) {
case 'G':
return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) &&
io.Emit(truth ? "T" : "F", 1);
+ case 'B':
+ return EditBOZOutput<1>(io, edit,
+ reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
+ case 'O':
+ return EditBOZOutput<3>(io, edit,
+ reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
+ case 'Z':
+ return EditBOZOutput<4>(io, edit,
+ reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
default:
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
"Data edit descriptor '%c' may not be used with a LOGICAL data item",
@@ -544,6 +631,15 @@ bool EditCharacterOutput(IoStatementState &io, const DataEdit &edit,
case 'A':
case 'G':
break;
+ case 'B':
+ return EditBOZOutput<1>(io, edit,
+ reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
+ case 'O':
+ return EditBOZOutput<3>(io, edit,
+ reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
+ case 'Z':
+ return EditBOZOutput<4>(io, edit,
+ reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
default:
io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
"Data edit descriptor '%c' may not be used with a CHARACTER data item",
More information about the flang-commits
mailing list