[flang-commits] [flang] [llvm] [flang][runtime] Formatted input optimizations (PR #134715)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Tue Apr 8 09:30:43 PDT 2025


================
@@ -130,20 +130,95 @@ class IoStatementState {
   }
 
   // Vacant after the end of the current record
-  RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
+  RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentCharSlow(
       std::size_t &byteCount);
 
+  // For faster formatted input editing, this structure can be built by
+  // GetUpcomingFastAsciiField() and used to save significant time in
+  // GetCurrentChar, NextInField() and other input utilities when the input
+  // is buffered, does not require UTF-8 conversion, and comprises only
+  // single byte characters.
+  class FastAsciiField {
+  public:
+    RT_API_ATTRS FastAsciiField(ConnectionState &connection)
+        : connection_{connection} {}
+    RT_API_ATTRS FastAsciiField(
+        ConnectionState &connection, const char *start, std::size_t bytes)
+        : connection_{connection}, at_{start}, limit_{start + bytes} {
+      CheckForAsterisk();
+    }
+    RT_API_ATTRS ConnectionState &connection() { return connection_; }
+    RT_API_ATTRS std::size_t got() const { return got_; }
+
+    RT_API_ATTRS bool IsActive() const { return at_ != nullptr; }
+
+    RT_API_ATTRS Fortran::common::optional<char32_t> Next() const {
+      if (at_ && at_ < limit_) {
+        return *at_;
+      } else {
+        return std::nullopt;
+      }
+    }
+    RT_API_ATTRS void NextRecord(IoStatementState &io) {
+      if (at_) {
+        if (std::size_t bytes{io.GetNextInputBytes(at_)}) {
+          limit_ = at_ + bytes;
+          CheckForAsterisk();
+        } else {
+          at_ = limit_ = nullptr;
+        }
+      }
+    }
+    RT_API_ATTRS void Advance(int gotten, std::size_t bytes) {
+      if (at_ && at_ < limit_) {
+        ++at_;
+        got_ += gotten;
+      }
+      connection_.HandleRelativePosition(bytes);
+    }
+    RT_API_ATTRS void SkipRestOfRecord() { at_ = limit_; }
+    RT_API_ATTRS bool MightHaveAsterisk() const { return !at_ || hasAsterisk_; }
+
+  private:
+    RT_API_ATTRS void CheckForAsterisk() {
+      hasAsterisk_ =
+          at_ && at_ < limit_ && std::memchr(at_, '*', limit_ - at_) != nullptr;
+    }
+
+    ConnectionState &connection_;
+    const char *at_{nullptr};
+    const char *limit_{nullptr};
+    std::size_t got_{0}; // for READ(..., SIZE=)
+    bool hasAsterisk_{false};
+  };
+
+  RT_API_ATTRS FastAsciiField GetUpcomingFastAsciiField();
+
+  RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
+      std::size_t &byteCount, FastAsciiField *field = nullptr) {
+    if (field) {
+      if (auto ch{field->Next()}) {
+        byteCount = ch ? 1 : 0;
+        return ch;
+      } else if (field->IsActive()) {
+        return std::nullopt;
----------------
klausler wrote:

There's a distinction between a FastAsciiField that has input (it's Active) and has reached its end, and one that never had input due to the unit being UTF-8 or internal I/O on a character with kind > 1.  In the first case, we've reached the end of a valid record and should stop fetching characters from it, and in the latter, we need to go through the slow path.

https://github.com/llvm/llvm-project/pull/134715


More information about the flang-commits mailing list