[flang-commits] [flang] 53f775b - [flang][runtime] Support B/O/Z editing of CHARACTER

Thu Apr 28 12:44:38 PDT 2022

Author: Peter Klausler
Date: 2022-04-28T12:44:31-07:00
New Revision: 53f775bbc0b8fbca941b5f7ad324b18884eddd7e

URL: https://github.com/llvm/llvm-project/commit/53f775bbc0b8fbca941b5f7ad324b18884eddd7e
DIFF: https://github.com/llvm/llvm-project/commit/53f775bbc0b8fbca941b5f7ad324b18884eddd7e.diff

LOG: [flang][runtime] Support B/O/Z editing of CHARACTER

This is a common extension, though semantics differ across
compilers.  I've chosen to interpret the CHARACTER data
as if it were an arbitrary-precision integer value and
format or read it as such.  This matches Intel's compilers
and nvfortran.  (GNU Fortran can't handle lengths > 1 and XLF
seems to get the enddianness wrong.)

This patch generalizes the previous implementations of
B/O/Z input and output so that they'll work for arbitrary data
in memory, and then uses them for all B/O/Z input/output,
including (now) CHARACTER.

Differential Revision: https://reviews.llvm.org/D124547

Added: 
    

Modified: 
    flang/runtime/edit-input.cpp
    flang/runtime/edit-output.cpp

Removed: 
    


################################################################################
diff  --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 439f49b2fc96..304394577861 100644

--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -16,37 +16,76 @@
 
 namespace Fortran::runtime::io {
 
-static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
-    int base, int totalBitSize) {
+template <int LOG2_BASE>
+static bool EditBOZInput(
+    IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
   std::optional<int> remaining;
   std::optional<char32_t> next{io.PrepareInput(edit, remaining)};
-  common::UnsignedInt128 value{0};
+  if (*next == '0') {
+    do {
+      next = io.NextInField(remaining, edit);
+    } while (next && *next == '0');
+  }
+  // Count significant digits after any leading white space & zeroes
+  int digits{0};
   for (; next; next = io.NextInField(remaining, edit)) {
     char32_t ch{*next};
     if (ch == ' ' || ch == '\t') {
       continue;
     }
-    int digit{0};
     if (ch >= '0' && ch <= '1') {
-      digit = ch - '0';
-    } else if (base >= 8 && ch >= '2' && ch <= '7') {
-      digit = ch - '0';
-    } else if (base >= 10 && ch >= '8' && ch <= '9') {
-      digit = ch - '0';
-    } else if (base == 16 && ch >= 'A' && ch <= 'Z') {
-      digit = ch + 10 - 'A';
-    } else if (base == 16 && ch >= 'a' && ch <= 'z') {
-      digit = ch + 10 - 'a';
+    } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
+    } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
+    } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
+    } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
     } else {
       io.GetIoErrorHandler().SignalError(
           "Bad character '%lc' in B/O/Z input field", ch);
       return false;
     }
-    value *= base;
-    value += digit;
+    ++digits;
+  }
+  auto significantBytes{static_cast<std::size_t>(digits * LOG2_BASE + 7) / 8};
+  if (significantBytes > bytes) {
+    io.GetIoErrorHandler().SignalError(
+        "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
+    return false;
+  }
+  // Reset to start of significant digits
+  io.HandleRelativePosition(-digits);
+  remaining.reset();
+  // Make a second pass now that the digit count is known
+  std::memset(n, 0, bytes);
+  int increment{isHostLittleEndian ? -1 : 1};
+  auto *data{reinterpret_cast<unsigned char *>(n) +
+      (isHostLittleEndian ? significantBytes - 1 : 0)};
+  int shift{((digits - 1) * LOG2_BASE) & 7};
+  if (shift + LOG2_BASE > 8) {
+    shift -= 8; // misaligned octal
+  }
+  while (digits > 0) {
+    char32_t ch{*io.NextInField(remaining, edit)};
+    int digit{0};
+    if (ch >= '0' && ch <= '9') {
+      digit = ch - '0';
+    } else if (ch >= 'A' && ch <= 'F') {
+      digit = ch + 10 - 'A';
+    } else if (ch >= 'a' && ch <= 'f') {
+      digit = ch + 10 - 'a';
+    } else {
+      continue;
+    }
+    --digits;
+    if (shift < 0) {
+      shift += 8;
+      if (shift + LOG2_BASE > 8) { // misaligned octal
+        *data |= digit >> (8 - shift);
+      }
+      data += increment;
+    }
+    *data |= digit << shift;
+    shift -= LOG2_BASE;
   }
-  // TODO: check for overflow
-  std::memcpy(n, &value, totalBitSize >> 3);
   return true;
 }
 
@@ -83,11 +122,11 @@ bool EditIntegerInput(
   case 'I':
     break;
   case 'B':
-    return EditBOZInput(io, edit, n, 2, kind << 3);
+    return EditBOZInput<1>(io, edit, n, kind);
   case 'O':
-    return EditBOZInput(io, edit, n, 8, kind << 3);
+    return EditBOZInput<3>(io, edit, n, kind);
   case 'Z':
-    return EditBOZInput(io, edit, n, 16, kind << 3);
+    return EditBOZInput<4>(io, edit, n, kind);
   case 'A': // legacy extension
     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
   default:
@@ -457,7 +496,6 @@ bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
 
 template <int KIND>
 bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
-  constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
   switch (edit.descriptor) {
   case DataEdit::ListDirected:
     if (IsNamelistName(io)) {
@@ -472,14 +510,14 @@ bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
   case 'G':
     return EditCommonRealInput<KIND>(io, edit, n);
   case 'B':
-    return EditBOZInput(
-        io, edit, n, 2, common::BitsForBinaryPrecision(binaryPrecision));
+    return EditBOZInput<1>(io, edit, n,
+        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
   case 'O':
-    return EditBOZInput(
-        io, edit, n, 8, common::BitsForBinaryPrecision(binaryPrecision));
+    return EditBOZInput<3>(io, edit, n,
+        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
   case 'Z':
-    return EditBOZInput(
-        io, edit, n, 16, common::BitsForBinaryPrecision(binaryPrecision));
+    return EditBOZInput<4>(io, edit, n,
+        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
   case 'A': // legacy extension
     return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
   default:
@@ -590,7 +628,7 @@ static bool EditListDirectedCharacterInput(
   // or the end of the current record.  Subtlety: the "remaining" count
   // here is a dummy that's used to avoid the interpretation of separators
   // in NextInField.
-  std::optional<int> remaining{maxUTF8Bytes};
+  std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
   while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
     switch (*next) {
     case ' ':
@@ -602,8 +640,7 @@ static bool EditListDirectedCharacterInput(
       break;
     default:
       *x++ = *next;
-      --length;
-      remaining = maxUTF8Bytes;
+      remaining = --length > 0 ? maxUTF8Bytes : 0;
     }
   }
   std::fill_n(x, length, ' ');
@@ -619,6 +656,12 @@ bool EditCharacterInput(
   case 'A':
   case 'G':
     break;
+  case 'B':
+    return EditBOZInput<1>(io, edit, x, length * sizeof *x);
+  case 'O':
+    return EditBOZInput<3>(io, edit, x, length * sizeof *x);
+  case 'Z':
+    return EditBOZInput<4>(io, edit, x, length * sizeof *x);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a CHARACTER data item",

diff  --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp
index ebdc62b78bb6..842ee837d69e 100644
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -13,6 +13,85 @@
 
 namespace Fortran::runtime::io {
 
+// B/O/Z output of arbitrarily sized data emits a binary/octal/hexadecimal
+// representation of what is interpreted to be a single unsigned integer value.
+// When used with character data, endianness is exposed.
+template <int LOG2_BASE>
+static bool EditBOZOutput(IoStatementState &io, const DataEdit &edit,
+    const unsigned char *data0, std::size_t bytes) {
+  int digits{static_cast<int>((bytes * 8) / LOG2_BASE)};
+  int get{static_cast<int>(bytes * 8) - digits * LOG2_BASE};
+  get = get ? get : LOG2_BASE;
+  int shift{7};
+  int increment{isHostLittleEndian ? -1 : 1};
+  const unsigned char *data{data0 + (isHostLittleEndian ? bytes - 1 : 0)};
+  int skippedZeroes{0};
+  int digit{0};
+  // The same algorithm is used to generate digits for real (below)
+  // as well as for generating them only to skip leading zeroes (here).
+  // Bits are copied one at a time from the source data.
+  // TODO: Multiple bit copies for hexadecimal, where misalignment
+  // is not possible; or for octal when all 3 bits come from the
+  // same byte.
+  while (bytes > 0) {
+    if (get == 0) {
+      if (digit != 0) {
+        break; // first nonzero leading digit
+      }
+      ++skippedZeroes;
+      get = LOG2_BASE;
+    } else if (shift < 0) {
+      data += increment;
+      --bytes;
+      shift = 7;
+    } else {
+      digit = 2 * digit + ((*data >> shift--) & 1);
+      --get;
+    }
+  }
+  // Emit leading spaces and zeroes; detect field overflow
+  int leadingZeroes{0};
+  int editWidth{edit.width.value_or(0)};
+  int significant{digits - skippedZeroes};
+  if (edit.digits && significant <= *edit.digits) { // Bw.m, Ow.m, Zw.m
+    if (*edit.digits == 0 && bytes == 0) {
+      editWidth = std::max(1, editWidth);
+    } else {
+      leadingZeroes = *edit.digits - significant;
+    }
+  } else if (bytes == 0) {
+    leadingZeroes = 1;
+  }
+  int subTotal{leadingZeroes + significant};
+  int leadingSpaces{std::max(0, editWidth - subTotal)};
+  if (editWidth > 0 && leadingSpaces + subTotal > editWidth) {
+    return io.EmitRepeated('*', editWidth);
+  }
+  if (!(io.EmitRepeated(' ', leadingSpaces) &&
+          io.EmitRepeated('0', leadingZeroes))) {
+    return false;
+  }
+  // Emit remaining digits
+  while (bytes > 0) {
+    if (get == 0) {
+      char ch{static_cast<char>(digit >= 10 ? 'A' + digit - 10 : '0' + digit)};
+      if (!io.Emit(&ch, 1)) {
+        return false;
+      }
+      get = LOG2_BASE;
+      digit = 0;
+    } else if (shift < 0) {
+      data += increment;
+      --bytes;
+      shift = 7;
+    } else {
+      digit = 2 * digit + ((*data >> shift--) & 1);
+      --get;
+    }
+  }
+  return true;
+}
+
 template <int KIND>
 bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
     common::HostSignedIntType<8 * KIND> n) {
@@ -38,21 +117,14 @@ bool EditIntegerOutput(IoStatementState &io, const DataEdit &edit,
     }
     break;
   case 'B':
-    for (; un > 0; un >>= 1) {
-      *--p = '0' + (static_cast<int>(un) & 1);
-    }
-    break;
+    return EditBOZOutput<1>(
+        io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
   case 'O':
-    for (; un > 0; un >>= 3) {
-      *--p = '0' + (static_cast<int>(un) & 7);
-    }
-    break;
+    return EditBOZOutput<3>(
+        io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
   case 'Z':
-    for (; un > 0; un >>= 4) {
-      int digit = static_cast<int>(un) & 0xf;
-      *--p = digit >= 10 ? 'A' + (digit - 10) : '0' + digit;
-    }
-    break;
+    return EditBOZOutput<4>(
+        io, edit, reinterpret_cast<const unsigned char *>(&n), KIND);
   case 'A': // legacy extension
     return EditCharacterOutput(
         io, edit, reinterpret_cast<char *>(&n), sizeof n);
@@ -442,11 +514,17 @@ template <int KIND> bool RealOutputEditing<KIND>::Edit(const DataEdit &edit) {
   case 'F':
     return EditFOutput(edit);
   case 'B':
+    return EditBOZOutput<1>(io_, edit,
+        reinterpret_cast<const unsigned char *>(&x_),
+        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
   case 'O':
+    return EditBOZOutput<3>(io_, edit,
+        reinterpret_cast<const unsigned char *>(&x_),
+        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
   case 'Z':
-    return EditIntegerOutput<KIND>(io_, edit,
-        static_cast<common::HostSignedIntType<8 * KIND>>(
-            decimal::BinaryFloatingPointNumber<binaryPrecision>{x_}.raw()));
+    return EditBOZOutput<4>(io_, edit,
+        reinterpret_cast<const unsigned char *>(&x_),
+        common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
   case 'G':
     return Edit(EditForGOutput(edit));
   case 'A': // legacy extension
@@ -475,6 +553,15 @@ bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) {
   case 'G':
     return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) &&
         io.Emit(truth ? "T" : "F", 1);
+  case 'B':
+    return EditBOZOutput<1>(io, edit,
+        reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
+  case 'O':
+    return EditBOZOutput<3>(io, edit,
+        reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
+  case 'Z':
+    return EditBOZOutput<4>(io, edit,
+        reinterpret_cast<const unsigned char *>(&truth), sizeof truth);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a LOGICAL data item",
@@ -544,6 +631,15 @@ bool EditCharacterOutput(IoStatementState &io, const DataEdit &edit,
   case 'A':
   case 'G':
     break;
+  case 'B':
+    return EditBOZOutput<1>(io, edit,
+        reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
+  case 'O':
+    return EditBOZOutput<3>(io, edit,
+        reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
+  case 'Z':
+    return EditBOZOutput<4>(io, edit,
+        reinterpret_cast<const unsigned char *>(x), sizeof(CHAR) * length);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a CHARACTER data item",