[flang-commits] [flang] [llvm] [flang][runtime] Formatted input optimizations (PR #134715)
Peter Klausler via flang-commits
flang-commits at lists.llvm.org
Tue Apr 8 09:30:43 PDT 2025
================
@@ -130,20 +130,95 @@ class IoStatementState {
}
// Vacant after the end of the current record
- RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
+ RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentCharSlow(
std::size_t &byteCount);
+ // For faster formatted input editing, this structure can be built by
+ // GetUpcomingFastAsciiField() and used to save significant time in
+ // GetCurrentChar, NextInField() and other input utilities when the input
+ // is buffered, does not require UTF-8 conversion, and comprises only
+ // single byte characters.
+ class FastAsciiField {
+ public:
+ RT_API_ATTRS FastAsciiField(ConnectionState &connection)
+ : connection_{connection} {}
+ RT_API_ATTRS FastAsciiField(
+ ConnectionState &connection, const char *start, std::size_t bytes)
+ : connection_{connection}, at_{start}, limit_{start + bytes} {
+ CheckForAsterisk();
+ }
+ RT_API_ATTRS ConnectionState &connection() { return connection_; }
+ RT_API_ATTRS std::size_t got() const { return got_; }
+
+ RT_API_ATTRS bool IsActive() const { return at_ != nullptr; }
+
+ RT_API_ATTRS Fortran::common::optional<char32_t> Next() const {
+ if (at_ && at_ < limit_) {
+ return *at_;
+ } else {
+ return std::nullopt;
+ }
+ }
+ RT_API_ATTRS void NextRecord(IoStatementState &io) {
+ if (at_) {
+ if (std::size_t bytes{io.GetNextInputBytes(at_)}) {
+ limit_ = at_ + bytes;
+ CheckForAsterisk();
+ } else {
+ at_ = limit_ = nullptr;
+ }
+ }
+ }
+ RT_API_ATTRS void Advance(int gotten, std::size_t bytes) {
+ if (at_ && at_ < limit_) {
+ ++at_;
+ got_ += gotten;
+ }
+ connection_.HandleRelativePosition(bytes);
+ }
+ RT_API_ATTRS void SkipRestOfRecord() { at_ = limit_; }
+ RT_API_ATTRS bool MightHaveAsterisk() const { return !at_ || hasAsterisk_; }
+
+ private:
+ RT_API_ATTRS void CheckForAsterisk() {
+ hasAsterisk_ =
+ at_ && at_ < limit_ && std::memchr(at_, '*', limit_ - at_) != nullptr;
+ }
+
+ ConnectionState &connection_;
+ const char *at_{nullptr};
+ const char *limit_{nullptr};
+ std::size_t got_{0}; // for READ(..., SIZE=)
+ bool hasAsterisk_{false};
+ };
+
+ RT_API_ATTRS FastAsciiField GetUpcomingFastAsciiField();
+
+ RT_API_ATTRS Fortran::common::optional<char32_t> GetCurrentChar(
+ std::size_t &byteCount, FastAsciiField *field = nullptr) {
+ if (field) {
+ if (auto ch{field->Next()}) {
+ byteCount = ch ? 1 : 0;
+ return ch;
+ } else if (field->IsActive()) {
+ return std::nullopt;
----------------
klausler wrote:
There's a distinction between a FastAsciiField that has input (it's Active) and has reached its end, and one that never had input due to the unit being UTF-8 or internal I/O on a character with kind > 1. In the first case, we've reached the end of a valid record and should stop fetching characters from it, and in the latter, we need to go through the slow path.
https://github.com/llvm/llvm-project/pull/134715
More information about the flang-commits
mailing list