[libcxx-commits] [libcxx] [libc++] Optimize std::getline (PR #121346)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Tue Dec 31 01:39:34 PST 2024
https://github.com/philnik777 updated https://github.com/llvm/llvm-project/pull/121346
>From 60ab33de0d098f74eda7d04e285cb288a7bd3d21 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 30 Dec 2024 16:57:54 +0100
Subject: [PATCH] [libc++] Optimize std::getline
```
-----------------------------------------------
Benchmark old new
-----------------------------------------------
BM_getline_string 318 ns 32.4 ns
```
---
libcxx/docs/ReleaseNotes/20.rst | 2 +
libcxx/include/istream | 60 +++++++++++--------
libcxx/include/streambuf | 7 +++
.../test/benchmarks/streams/getline.bench.cpp | 35 +++++++++++
4 files changed, 78 insertions(+), 26 deletions(-)
create mode 100644 libcxx/test/benchmarks/streams/getline.bench.cpp
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index c8a07fb8b73348..05881722672dda 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -73,6 +73,8 @@ Improvements and New Features
optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
+- The performance of ``std::getline`` has been improved, resulting in a performance uplift of up to 10x.
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/istream b/libcxx/include/istream
index 4b177c41cc325e..76ddab04bbdcdd 100644
--- a/libcxx/include/istream
+++ b/libcxx/include/istream
@@ -1265,41 +1265,49 @@ _LIBCPP_HIDE_FROM_ABI basic_istream<_CharT, _Traits>&
getline(basic_istream<_CharT, _Traits>& __is, basic_string<_CharT, _Traits, _Allocator>& __str, _CharT __dlm) {
ios_base::iostate __state = ios_base::goodbit;
typename basic_istream<_CharT, _Traits>::sentry __sen(__is, true);
- if (__sen) {
+ if (!__sen)
+ return __is;
# if _LIBCPP_HAS_EXCEPTIONS
- try {
+ try {
# endif
- __str.clear();
- streamsize __extr = 0;
- while (true) {
- typename _Traits::int_type __i = __is.rdbuf()->sbumpc();
- if (_Traits::eq_int_type(__i, _Traits::eof())) {
- __state |= ios_base::eofbit;
- break;
- }
- ++__extr;
- _CharT __ch = _Traits::to_char_type(__i);
- if (_Traits::eq(__ch, __dlm))
- break;
- __str.push_back(__ch);
- if (__str.size() == __str.max_size()) {
+ __str.clear();
+
+ auto& __buffer = *__is.rdbuf();
+
+ auto __next = __buffer.sgetc();
+ for (; !_Traits::eq_int_type(__next, _Traits::eof()); __next = __buffer.sgetc()) {
+ const auto* __first = __buffer.gptr();
+ const auto* __last = __buffer.egptr();
+ const auto* __match = _Traits::find(__first, __last - __first, __dlm);
+ if (__match) {
+ if (auto __cap = __str.max_size() - __str.size(); __cap <= static_cast<size_t>(__match - __first)) {
+ __str.append(__first, __cap);
+ __buffer.__gbump_ptrdiff(__cap);
__state |= ios_base::failbit;
break;
}
+ __str.append(__first, __match);
+ __buffer.__gbump_ptrdiff(__match - __first + 1);
+ break;
}
- if (__extr == 0)
- __state |= ios_base::failbit;
+
+ __str.append(__first, __last);
+ __buffer.__gbump_ptrdiff(__last - __first);
+ }
+
+ if (_Traits::eq_int_type(__next, _Traits::eof()))
+ __state |= ios_base::eofbit | (__str.empty() ? ios_base::failbit : ios_base::goodbit);
+
# if _LIBCPP_HAS_EXCEPTIONS
- } catch (...) {
- __state |= ios_base::badbit;
- __is.__setstate_nothrow(__state);
- if (__is.exceptions() & ios_base::badbit) {
- throw;
- }
+ } catch (...) {
+ __state |= ios_base::badbit;
+ __is.__setstate_nothrow(__state);
+ if (__is.exceptions() & ios_base::badbit) {
+ throw;
}
-# endif
- __is.setstate(__state);
}
+# endif
+ __is.setstate(__state);
return __is;
}
diff --git a/libcxx/include/streambuf b/libcxx/include/streambuf
index 7f02a9b3314110..a3e1cf489d0efa 100644
--- a/libcxx/include/streambuf
+++ b/libcxx/include/streambuf
@@ -241,6 +241,9 @@ protected:
inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void gbump(int __n) { __ninp_ += __n; }
+ // gbump takes an int, so it might not be able to represent the offset we want to add.
+ _LIBCPP_HIDE_FROM_ABI void __gbump_ptrdiff(ptrdiff_t __n) { __ninp_ += __n; }
+
inline _LIBCPP_HIDE_FROM_ABI_AFTER_V1 void setg(char_type* __gbeg, char_type* __gnext, char_type* __gend) {
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gnext), "[gbeg, gnext) must be a valid range");
_LIBCPP_ASSERT_VALID_INPUT_RANGE(std::__is_valid_range(__gbeg, __gend), "[gbeg, gend) must be a valid range");
@@ -297,6 +300,10 @@ private:
char_type* __bout_;
char_type* __nout_;
char_type* __eout_;
+
+ template <class _CharT2, class _Traits2, class _Allocator>
+ _LIBCPP_HIDE_FROM_ABI friend basic_istream<_CharT2, _Traits2>&
+ getline(basic_istream<_CharT2, _Traits2>&, basic_string<_CharT2, _Traits2, _Allocator>&, _CharT2);
};
template <class _CharT, class _Traits>
diff --git a/libcxx/test/benchmarks/streams/getline.bench.cpp b/libcxx/test/benchmarks/streams/getline.bench.cpp
new file mode 100644
index 00000000000000..6a2215fe061167
--- /dev/null
+++ b/libcxx/test/benchmarks/streams/getline.bench.cpp
@@ -0,0 +1,35 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+#include <istream>
+#include <sstream>
+
+#include <benchmark/benchmark.h>
+
+void BM_getline_string(benchmark::State& state) {
+ std::istringstream iss;
+
+ std::string str;
+ str.reserve(128);
+ iss.str("A long string to let getline do some more work, making sure that longer strings are parsed fast enough");
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(iss);
+
+ std::getline(iss, str);
+ benchmark::DoNotOptimize(str);
+ iss.seekg(0);
+ }
+}
+
+BENCHMARK(BM_getline_string);
+
+BENCHMARK_MAIN();
More information about the libcxx-commits
mailing list