[libcxx-commits] [libcxx] PR: [libc++] Speed-up input_range based operations in vector<bool> (PR #124188)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jan 23 12:54:15 PST 2025
https://github.com/winner245 created https://github.com/llvm/llvm-project/pull/124188
As a follow-up to #120134 (which improved the performance of `forward_range` based operations in `vector<bool>`), this PR enhances the performance of `input_iterator`-pair or `input_range` based constructors and assignment functions. Instead of processing individual bits as in the previous implementation, the new implementation first collects bits into full storage words and then process entire words. This leads to performance improvements of up to 1.9x for the input_{iterator, range} constructors and up to 5.5x for the input_{iterator, range} assignment functions.
- input_iterator-pair ctors `vector( InputIt first, InputIt last, const Allocator& alloc)`: **1.9x**
- input_range-ctors `vector(std::from_range_t, R&& rg, const Allocator& alloc)`: **1.9x**
- input_iterator-pair assign function `assign(InputIt first, InputIt last)`: **5.1x**
- input_range assign function `assign_range(R&& rg)`: **5.5x**
#### Before:
```
--------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------------------------------------------
BM_ConstructInputIterIter<std::vector<bool>>/vb/514048 4726350 ns 4433882 ns 160
BM_ConstructFromInputRange<std::vector<bool>>/vb/514048 4824221 ns 4453109 ns 161
BM_AssignInputIterIter<std::vector<bool>>/vb/514048 2509647 ns 2316606 ns 302
BM_AssignInputRange<std::vector<bool>>/vb/514048 2604604 ns 2445725 ns 257
```
#### After:
```
--------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------------------------------------------------
BM_ConstructInputIterIter<std::vector<bool>>/vb/514048 2533024 ns 2522681 ns 281
BM_ConstructFromInputRange<std::vector<bool>>/vb/514048 2551838 ns 2551748 ns 269
BM_AssignInputIterIter<std::vector<bool>>/vb/514048 490259 ns 485889 ns 1177
BM_AssignInputRange<std::vector<bool>>/vb/514048 475265 ns 469883 ns 1514
```
>From 522b0a91a441d8c41b241cbbfe8ad07975a3f9fd Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Wed, 22 Jan 2025 21:08:19 -0500
Subject: [PATCH] Speed-up input_iterator-pair ctor and assign functions
---
libcxx/include/__vector/vector_bool.h | 39 +++++++++--
.../containers/ContainerBenchmarks.h | 70 ++++++++++++++++---
.../vector_bool_operations.bench.cpp | 32 +++++++++
.../containers/vector_operations.bench.cpp | 15 ++--
.../test/std/containers/from_range_helpers.h | 18 +++++
5 files changed, 150 insertions(+), 24 deletions(-)
create mode 100644 libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 4f1c442ce0be8d..c04498626abe6c 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -411,8 +411,7 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
__init_with_sentinel(_InputIterator __first, _Sentinel __last) {
auto __guard = std::__make_exception_guard(__destroy_vector(*this));
- for (; __first != __last; ++__first)
- push_back(*__first);
+ __push_back_words_with_sentinel(std::move(__first), std::move(__last));
__guard.__complete();
}
@@ -509,6 +508,10 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __move_assign_alloc(vector&, false_type) _NOEXCEPT {}
+ template <class _InputIterator, class _Sentinel>
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+ __push_back_words_with_sentinel(_InputIterator __first, _Sentinel __last);
+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_t __hash_code() const _NOEXCEPT;
friend class __bit_reference<vector>;
@@ -820,8 +823,7 @@ template <class _Iterator, class _Sentinel>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
vector<bool, _Allocator>::__assign_with_sentinel(_Iterator __first, _Sentinel __last) {
clear();
- for (; __first != __last; ++__first)
- push_back(*__first);
+ __push_back_words_with_sentinel(std::move(__first), std::move(__last));
}
template <class _Allocator>
@@ -1084,6 +1086,35 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<bool, _Allocator>::flip() _NOEXCEPT {
*__p = ~*__p;
}
+// Push bits from the range [__first, __last) into the vector in word-sized chunks.
+// Precondition: The size of the vector must be a multiple of `__bits_per_word`,
+// implying that the vector can only accommodate full words of bits.
+//
+// This function iterates through the input range, collecting bits until a full
+// word is formed or the end of the range is reached. It then stores the word
+// in the vector's internal storage, reallocating if necessary.
+template <class _Allocator>
+template <class _InputIterator, class _Sentinel>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+vector<bool, _Allocator>::__push_back_words_with_sentinel(_InputIterator __first, _Sentinel __last) {
+ _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+ this->__size_ % __bits_per_word == 0,
+ "vector<bool>::__push_back_words_with_sentinel called with a size that is not a multiple of __bits_per_word");
+ unsigned __n_words = this->__size_ / __bits_per_word;
+ while (__first != __last) {
+ __storage_type __w = 0;
+ unsigned __ctz = 0;
+ for (; __ctz != __bits_per_word && __first != __last; ++__ctz, (void)++__first) {
+ if (*__first)
+ __w |= static_cast<__storage_type>(static_cast<__storage_type>(1) << __ctz);
+ }
+ if (this->__size_ == this->capacity())
+ reserve(__recommend(this->__size_ + 1));
+ this->__begin_[__n_words++] = __w;
+ this->__size_ += __ctz;
+ }
+}
+
template <class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 bool vector<bool, _Allocator>::__invariants() const {
if (this->__begin_ == nullptr) {
diff --git a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
index 5fc8981619672c..a1b9e6585d9164 100644
--- a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
@@ -15,6 +15,7 @@
#include <utility>
#include "benchmark/benchmark.h"
+#include "../../std/containers/from_range_helpers.h"
#include "../Utilities.h"
#include "test_iterators.h"
@@ -51,16 +52,43 @@ void BM_Assignment(benchmark::State& st, Container) {
}
}
-template <std::size_t... sz, typename Container, typename GenInputs>
-void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
- auto v = gen(1, sz...);
- c.resize(st.range(0), v[0]);
- auto in = gen(st.range(1), sz...);
- benchmark::DoNotOptimize(&in);
- benchmark::DoNotOptimize(&c);
+template <typename Container, class Generator>
+void BM_AssignInputIterIter(benchmark::State& st, Generator gen) {
+ using T = typename Container::value_type;
+ auto size = st.range(0);
+ auto in1 = gen(size);
+ auto in2 = gen(size);
+ DoNotOptimizeData(in1);
+ DoNotOptimizeData(in2);
+ Container c(in1.begin(), in1.end());
+ bool toggle = false;
for (auto _ : st) {
- c.assign(cpp17_input_iterator(in.begin()), cpp17_input_iterator(in.end()));
- benchmark::ClobberMemory();
+ std::vector<T>& in = toggle ? in1 : in2;
+ auto first = in.begin();
+ auto last = in.end();
+ c.assign(cpp17_input_iterator(first), cpp17_input_iterator(last));
+ toggle = !toggle;
+ DoNotOptimizeData(c);
+ }
+}
+
+template <typename Container, class Generator>
+void BM_AssignInputRange(benchmark::State& st, Generator gen) {
+ using T = typename Container::value_type;
+ auto size = st.range(0);
+ auto in1 = gen(size);
+ auto in2 = gen(size);
+ DoNotOptimizeData(in1);
+ DoNotOptimizeData(in2);
+ input_only_range rg1(std::ranges::begin(in1), std::ranges::end(in1));
+ input_only_range rg2(std::ranges::begin(in2), std::ranges::end(in2));
+ Container c(std::from_range, rg1);
+ bool toggle = false;
+ for (auto _ : st) {
+ auto& rg = toggle ? rg1 : rg2;
+ c.assign_range(rg);
+ toggle = !toggle;
+ DoNotOptimizeData(c);
}
}
@@ -85,6 +113,18 @@ void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
}
}
+template <class Container, class GenInputs>
+void BM_ConstructInputIterIter(benchmark::State& st, GenInputs gen) {
+ auto in = gen(st.range(0));
+ const auto beg = cpp17_input_iterator(in.begin());
+ const auto end = cpp17_input_iterator(in.end());
+ benchmark::DoNotOptimize(&in);
+ while (st.KeepRunning()) {
+ Container c(beg, end);
+ DoNotOptimizeData(c);
+ }
+}
+
template <class Container, class GenInputs>
void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
auto in = gen(st.range(0));
@@ -95,6 +135,18 @@ void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
}
}
+template <class Container, class GenInputs>
+void BM_ConstructFromInputRange(benchmark::State& st, GenInputs gen) {
+ using It = typename Container::iterator;
+ auto in = gen(st.range(0));
+ input_only_range rg(std::ranges::begin(in), std::ranges::end(in));
+ benchmark::DoNotOptimize(&in);
+ while (st.KeepRunning()) {
+ Container c(std::from_range, rg);
+ DoNotOptimizeData(c);
+ }
+}
+
template <class Container>
void BM_Pushback_no_grow(benchmark::State& state, Container c) {
int count = state.range(0);
diff --git a/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
new file mode 100644
index 00000000000000..b619fbea7b2b93
--- /dev/null
+++ b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "ContainerBenchmarks.h"
+#include "../GenerateInput.h"
+
+using namespace ContainerBenchmarks;
+
+BENCHMARK_CAPTURE(BM_ConstructInputIterIter<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+BENCHMARK_CAPTURE(BM_ConstructFromInputRange<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+BENCHMARK_CAPTURE(BM_AssignInputRange<std::vector<bool>>, vb, getRandomIntegerInputs<bool>)->Arg(514048);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/benchmarks/containers/vector_operations.bench.cpp b/libcxx/test/benchmarks/containers/vector_operations.bench.cpp
index 1cd754ca7e7803..3261cf3afab8bb 100644
--- a/libcxx/test/benchmarks/containers/vector_operations.bench.cpp
+++ b/libcxx/test/benchmarks/containers/vector_operations.bench.cpp
@@ -78,18 +78,11 @@ BENCHMARK(bm_grow<std::string>);
BENCHMARK(bm_grow<std::unique_ptr<int>>);
BENCHMARK(bm_grow<std::deque<int>>);
-BENCHMARK_CAPTURE(BM_AssignInputIterIter, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
- ->Args({TestNumInputs, TestNumInputs});
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<std::vector<int>>, vector_int, getRandomIntegerInputs<int>)
+ ->Arg(TestNumInputs);
-BENCHMARK_CAPTURE(
- BM_AssignInputIterIter<32>, vector_string, std::vector<std::string>{}, getRandomStringInputsWithLength)
- ->Args({TestNumInputs, TestNumInputs});
-
-BENCHMARK_CAPTURE(BM_AssignInputIterIter<100>,
- vector_vector_int,
- std::vector<std::vector<int>>{},
- getRandomIntegerInputsWithLength<int>)
- ->Args({TestNumInputs, TestNumInputs});
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<std::vector<std::string>>, vector_string, getRandomStringInputs)
+ ->Arg(TestNumInputs);
BENCHMARK_CAPTURE(BM_Insert_InputIterIter_NoRealloc, vector_int, std::vector<int>(100, 1), getRandomIntegerInputs<int>)
->Arg(514048);
diff --git a/libcxx/test/std/containers/from_range_helpers.h b/libcxx/test/std/containers/from_range_helpers.h
index e17ea247618bc2..f98fddcf29d525 100644
--- a/libcxx/test/std/containers/from_range_helpers.h
+++ b/libcxx/test/std/containers/from_range_helpers.h
@@ -50,6 +50,24 @@ constexpr auto wrap_input(std::vector<T>& input) {
return std::ranges::subrange(std::move(b), std::move(e));
}
+template <class It>
+class input_only_range {
+public:
+ using Iter = cpp20_input_iterator<It>;
+ using Sent = sentinel_wrapper<Iter>;
+
+ input_only_range(It begin, It end) : begin_(std::move(begin)), end_(std::move(end)) {}
+ Iter begin() { return Iter(std::move(begin_)); }
+ Sent end() { return Sent(Iter(std::move(end_))); }
+
+private:
+ It begin_;
+ It end_;
+};
+
+template <class It>
+input_only_range(It, It) -> input_only_range<It>;
+
struct KeyValue {
int key; // Only the key is considered for equality comparison.
char value; // Allows distinguishing equivalent instances.
More information about the libcxx-commits
mailing list