[libcxx-commits] [libcxx] [libc++] Optimize std::getline (PR #121346)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Wed Feb 5 07:07:46 PST 2025


https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/121346

>From 310f910e763d18d8fa939a8f588d00d8839ece7d Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 30 Dec 2024 16:57:54 +0100
Subject: [PATCH] [libc++] Optimize std::getline

```
-----------------------------------------------
Benchmark                   old             new
-----------------------------------------------
BM_getline_string        318 ns         32.4 ns
```
---
 libcxx/docs/ReleaseNotes/20.rst               |   2 +
 libcxx/include/istream                        |  77 ++++++----
 libcxx/include/streambuf                      |   7 +
 .../test/benchmarks/streams/getline.bench.cpp |  35 +++++
 .../string.io/get_line.pass.cpp               | 137 ++++++++----------
 libcxx/test/support/stream_types.h            |  44 ++++++
 6 files changed, 196 insertions(+), 106 deletions(-)
 create mode 100644 libcxx/test/benchmarks/streams/getline.bench.cpp
 create mode 100644 libcxx/test/support/stream_types.h

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 57ab0c167544bc..3dc11c3b04f980 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -120,6 +120,8 @@ Improvements and New Features
 
 - Added :ref:`hardening mode <hardening>` support for ``forward_list`` and ``bitset``.
 
+- The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.
+
 Deprecations and Removals
 -------------------------
 
diff --git a/libcxx/include/istream b/libcxx/include/istream
index 4b177c41cc325e..a9fd81d34786b3 100644
--- a/libcxx/include/istream
+++ b/libcxx/include/istream
@@ -1265,41 +1265,66 @@ _LIBCPP_HIDE_FROM_ABI basic_istream<_CharT, _Traits>&
 getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm) {
   ios_base::iostate __state = ios_base::goodbit;
   typename basic_istream<_CharT, _Traits>::sentry __sen(__is, true);
-  if (__sen) {
+  if (!__sen)
+    return __is;
 #    if _LIBCPP_HAS_EXCEPTIONS
-    try {
+  try {
 #    endif
-      __str.clear();
-      streamsize __extr = 0;
-      while (true) {
-        typename _Traits::int_type __i = __is.rdbuf()->sbumpc();
-        if (_Traits::eq_int_type(__i, _Traits::eof())) {
-          __state |= ios_base::eofbit;
-          break;
+    __str.clear();
+
+    auto& __buffer = *__is.rdbuf();
+
+    auto __next = __buffer.sgetc();
+    for (; !_Traits::eq_int_type(__next, _Traits::eof()); __next = __buffer.sgetc()) {
+      const auto* __first = __buffer.gptr();
+      const auto* __last  = __buffer.egptr();
+      _CharT __1buf;
+
+      if (__first == __last) {
+        __1buf = __next;
+        __first = &__1buf;
+        __last = &__1buf + 1;
+      }
+
+      auto __bump_stream = [&](ptrdiff_t __diff) {
+        if (__first == &__1buf) {
+          _LIBCPP_ASSERT_INTERNAL(__diff == 1, "trying to bump stream further than buffer size");
+          __buffer.sbumpc();
+        } else {
+          __buffer.__gbump_ptrdiff(__diff);
         }
-        ++__extr;
-        _CharT __ch = _Traits::to_char_type(__i);
-        if (_Traits::eq(__ch, __dlm))
-          break;
-        __str.push_back(__ch);
-        if (__str.size() == __str.max_size()) {
+      };
+
+      const auto* __match = _Traits::find(__first, __last - __first, __dlm);
+      if (__match) {
+        if (auto __cap = __str.max_size() - __str.size(); __cap > static_cast<size_t>(__match - __first)) {
+          __str.append(__first, __match);
+          __bump_stream(__match - __first + 1);
+        } else {
+          __str.append(__first, __cap);
+          __bump_stream(__cap);
           __state |= ios_base::failbit;
-          break;
         }
+        break;
       }
-      if (__extr == 0)
-        __state |= ios_base::failbit;
+
+      __str.append(__first, __last);
+      __bump_stream(__last - __first);
+    }
+
+    if (_Traits::eq_int_type(__next, _Traits::eof()))
+      __state |= ios_base::eofbit | (__str.empty() ? ios_base::failbit : ios_base::goodbit);
+
 #    if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      __state |= ios_base::badbit;
-      __is.__setstate_nothrow(__state);
-      if (__is.exceptions() & ios_base::badbit) {
-        throw;
-      }
+  } catch (...) {
+    __state |= ios_base::badbit;
+    __is.__setstate_nothrow(__state);
+    if (__is.exceptions() & ios_base::badbit) {
+      throw;
     }
-#    endif
-    __is.setstate(__state);
   }
+#    endif
+  __is.setstate(__state);
   return __is;
 }
 
diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf
index 3c4e9086e05ecb..9358c40d00a99b 100644
--- a/libcxx/include/streambuf
+++ b/libcxx/include/streambuf
@@ -267,6 +267,9 @@ protected:
 
   inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }
 
+  // gbump takes an int, so it might not be able to represent the offset we want to add.
+  _LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }
+
   inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
     _LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
@@ -370,6 +373,10 @@ private:
   char_type* __bout_ = nullptr;
   char_type* __nout_ = nullptr;
   char_type* __eout_ = nullptr;
+
+  template <class _CharT2, class _Traits2, class _Allocator>
+  _LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT2, _Traits2>&
+  getline(basic_istream<_CharT2, _Traits2>&, basic_string<_CharT2, _Traits2, _Allocator>&, _CharT2);
 };
 
 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_streambuf<char>;
diff --git a/libcxx/test/benchmarks/streams/getline.bench.cpp b/libcxx/test/benchmarks/streams/getline.bench.cpp
new file mode 100644
index 00000000000000..6a2215fe061167
--- /dev/null
+++ b/libcxx/test/benchmarks/streams/getline.bench.cpp
@@ -0,0 +1,35 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+#include <istream>
+#include <sstream>
+
+#include <benchmark/benchmark.h>
+
+void BM_getline_string(benchmark::State& state) {
+  std::istringstream iss;
+
+  std::string str;
+  str.reserve(128);
+  iss.str("A long string to let getline do some more work, making sure that longer strings are parsed fast enough");
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(iss);
+
+    std::getline(iss, str);
+    benchmark::DoNotOptimize(str);
+    iss.seekg(0);
+  }
+}
+
+BENCHMARK(BM_getline_string);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line.pass.cpp
index 7bcb34135440bf..2dee62df14b42c 100644
--- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.io/get_line.pass.cpp
@@ -13,55 +13,42 @@
 //   getline(basic_istream<charT,traits>& is,
 //           basic_string<charT,traits,Allocator>& str);
 
-#include <string>
-#include <sstream>
 #include <cassert>
+#include <sstream>
+#include <string>
 
+#include "make_string.h"
 #include "min_allocator.h"
+#include "stream_types.h"
 #include "test_macros.h"
 
-template <template <class> class Alloc>
-void test_string() {
-  using S = std::basic_string<char, std::char_traits<char>, Alloc<char> >;
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
-  using WS = std::basic_string<wchar_t, std::char_traits<wchar_t>, Alloc<wchar_t> >;
-#endif
-  {
-    std::istringstream in(" abc\n  def\n   ghij");
-    S s("initial text");
-    std::getline(in, s);
-    assert(in.good());
-    assert(s == " abc");
-    std::getline(in, s);
-    assert(in.good());
-    assert(s == "  def");
-    std::getline(in, s);
-    assert(in.eof());
-    assert(s == "   ghij");
-  }
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+template <class CharT, class Alloc, class Stream, class Streambuf>
+void test() {
+  using string_type    = std::basic_string<CharT, std::char_traits<CharT>, Alloc>;
+  using stream_type    = std::basic_istream<CharT>;
+  using streambuf_type = Streambuf;
+
   {
-    std::wistringstream in(L" abc\n  def\n   ghij");
-    WS s(L"initial text");
+    streambuf_type sb(MAKE_CSTRING(CharT, " abc\n  def\n   ghij"));
+    stream_type in(&sb);
+    string_type s(MAKE_CSTRING(CharT, "initial text"));
     std::getline(in, s);
     assert(in.good());
-    assert(s == L" abc");
+    assert(s == MAKE_CSTRING(CharT, " abc"));
     std::getline(in, s);
     assert(in.good());
-    assert(s == L"  def");
+    assert(s == MAKE_CSTRING(CharT, "  def"));
     std::getline(in, s);
     assert(in.eof());
-    assert(s == L"   ghij");
+    assert(s == MAKE_CSTRING(CharT, "   ghij"));
   }
-#endif
-
 #ifndef TEST_HAS_NO_EXCEPTIONS
   {
-    std::basic_stringbuf<char> sb("hello");
-    std::basic_istream<char> is(&sb);
+    streambuf_type sb(MAKE_CSTRING(CharT, "hello"));
+    stream_type is(&sb);
     is.exceptions(std::ios_base::eofbit);
 
-    S s;
+    string_type s;
     bool threw = false;
     try {
       std::getline(is, s);
@@ -73,15 +60,14 @@ void test_string() {
     assert(!is.fail());
     assert(is.eof());
     assert(threw);
-    assert(s == "hello");
+    assert(s == MAKE_CSTRING(CharT, "hello"));
   }
-#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
   {
-    std::basic_stringbuf<wchar_t> sb(L"hello");
-    std::basic_istream<wchar_t> is(&sb);
-    is.exceptions(std::ios_base::eofbit);
+    streambuf_type sb(MAKE_CSTRING(CharT, ""));
+    stream_type is(&sb);
+    is.exceptions(std::ios_base::failbit);
 
-    WS s;
+    string_type s;
     bool threw = false;
     try {
       std::getline(is, s);
@@ -90,61 +76,52 @@ void test_string() {
     }
 
     assert(!is.bad());
-    assert(!is.fail());
+    assert(is.fail());
     assert(is.eof());
     assert(threw);
-    assert(s == L"hello");
+    assert(s == MAKE_CSTRING(CharT, ""));
   }
-#  endif
+#endif // TEST_HAS_NO_EXCEPTIONS
+}
 
+template <template <class> class Alloc>
+void test_alloc() {
+  test<char, Alloc<char>, std::basic_istringstream<char>, std::basic_stringbuf<char> >();
+  test<char, Alloc<char>, std::basic_istringstream<char>, non_buffering_streambuf<char> >();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, std::basic_stringbuf<wchar_t> >();
+  test<wchar_t, Alloc<wchar_t>, std::basic_istringstream<wchar_t>, non_buffering_streambuf<wchar_t> >();
+#endif
+}
+/*
+void test_tiny_allocator() {
   {
-    std::basic_stringbuf<char> sb;
-    std::basic_istream<char> is(&sb);
-    is.exceptions(std::ios_base::failbit);
-
-    S s;
-    bool threw = false;
-    try {
-      std::getline(is, s);
-    } catch (std::ios::failure const&) {
-      threw = true;
-    }
-
-    assert(!is.bad());
-    assert(is.fail());
-    assert(is.eof());
-    assert(threw);
-    assert(s == "");
+    std::string in_str =
+        "this is a too long line for the string that has to be longer because the implementation is broken\n";
+    std::istringstream iss(in_str);
+    std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char>> str;
+    std::getline(iss, str);
+    assert(str == std::string_view{in_str}.substr(0, str.max_size()));
+    assert(iss.rdstate() & std::ios::failbit);
   }
-#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
   {
-    std::basic_stringbuf<wchar_t> sb;
-    std::basic_istream<wchar_t> is(&sb);
-    is.exceptions(std::ios_base::failbit);
-
-    WS s;
-    bool threw = false;
-    try {
-      std::getline(is, s);
-    } catch (std::ios::failure const&) {
-      threw = true;
-    }
-
-    assert(!is.bad());
-    assert(is.fail());
-    assert(is.eof());
-    assert(threw);
-    assert(s == L"");
+    std::string in_str =
+        "this is a too long line for the string that has to be longer because the implementation is broken";
+    std::istringstream iss(in_str);
+    std::basic_string<char, std::char_traits<char>, tiny_size_allocator<40, char>> str;
+    std::getline(iss, str);
+    assert(str == std::string_view{in_str}.substr(0, str.max_size()));
+    assert(iss.rdstate() & std::ios::failbit);
   }
-#  endif
-#endif // TEST_HAS_NO_EXCEPTIONS
 }
+*/
 
 int main(int, char**) {
-  test_string<std::allocator>();
+  test_alloc<std::allocator>();
 #if TEST_STD_VER >= 11
-  test_string<min_allocator>();
+  test_alloc<min_allocator>();
 #endif
+  // test_tiny_allocator();
 
   return 0;
 }
diff --git a/libcxx/test/support/stream_types.h b/libcxx/test/support/stream_types.h
new file mode 100644
index 00000000000000..1aa089bd552456
--- /dev/null
+++ b/libcxx/test/support/stream_types.h
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_SUPPORT_STREAM_TYPE_H
+#define TEST_SUPPORT_STREAM_TYPE_H
+
+#include <streambuf>
+#include <string>
+#include <utility>
+
+template <class CharT>
+class non_buffering_streambuf : public std::basic_streambuf<CharT> {
+  using char_type   = CharT;
+  using traits_type = std::char_traits<CharT>;
+  using int_type    = typename traits_type::int_type;
+
+public:
+  non_buffering_streambuf(std::basic_string<char_type> underlying_data)
+      : underlying_data_(std::move(underlying_data)), index_(0) {}
+
+protected:
+  int_type underflow() override {
+    if (index_ != underlying_data_.size())
+      return underlying_data_[index_];
+    return traits_type::eof();
+  }
+
+  int_type uflow() override {
+    if (index_ != underlying_data_.size())
+      return underlying_data_[index_++];
+    return traits_type::eof();
+  }
+
+private:
+  std::basic_string<char_type> underlying_data_;
+  size_t index_;
+};
+
+#endif // TEST_SUPPORT_STREAM_TYPE_H



More information about the libcxx-commits mailing list