[libcxx-commits] [libcxx] [libc++] Speed up vector<bool> copy/move-ctors [1/3] (PR #120132)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Dec 16 12:03:36 PST 2024
https://github.com/winner245 created https://github.com/llvm/llvm-project/pull/120132
### General description
This PR is part of a series aimed at significantly improving the performance of `vector<bool>`. Each PR focuses on enhancing a specific subset of operations, ensuring they are self-contained and easy to review. The main idea for performance improvements involves using word-wise implementation along with bit manipulation techniques, rather than solely using bit-wise operations in the previous implementation, resulting in substantial performance gains.
### Current PR
This PR significantly improves the performance of various constructors in `vector<bool>` by at least **500x** under a variety of practical storage word sizes. The improvements for the above tests are:
- Copy constructor: **576x**
- Copy constructor with extended allocator: **546x**
- Move constructor with extended allocator: **568x**
#### Before:
```
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------
BM_CopyConstruct/vector_bool/5140480 8596063 ns 8563419 ns 78
BM_MoveConstruct/vector_bool/5140480 0.474 ns 0.473 ns 1470604919
BM_CopyConstruct_Alloc/vector_bool/5140480 8182327 ns 8169876 ns 83
BM_MoveConstruct_Alloc/vector_bool/5140480 8544951 ns 8537676 ns 83
```
#### After:
```
-------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------
BM_CopyConstruct/vector_bool/5140480 14899 ns 14943 ns 46010
BM_MoveConstruct/vector_bool/5140480 0.451 ns 0.453 ns 1571837345
BM_CopyConstruct_Alloc/vector_bool/5140480 14979 ns 15033 ns 47913
BM_MoveConstruct_Alloc/vector_bool/5140480 15043 ns 15101 ns 45906
```
>From 05bd92720ddacedc7a5c79c844501e56e32ada56 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Mon, 16 Dec 2024 13:11:30 -0500
Subject: [PATCH] Speed-up copy/move-ctors for vector<bool>
---
libcxx/include/__vector/vector_bool.h | 28 +++++++++------
.../containers/ContainerBenchmarks.h | 30 ++++++++++++++++
.../vector_bool_operations.bench.cpp | 36 +++++++++++++++++++
3 files changed, 83 insertions(+), 11 deletions(-)
create mode 100644 libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 36eb7f350ac406..d7f9ba78a7af80 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -398,6 +398,8 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
__guard.__complete();
}
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __alloc_and_copy(const vector& __v);
+
template <class _Iterator, class _Sentinel>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __assign_with_sentinel(_Iterator __first, _Sentinel __last);
@@ -674,25 +676,30 @@ vector<bool, _Allocator>::vector(initializer_list<value_type> __il, const alloca
#endif // _LIBCPP_CXX03_LANG
+// This function copies each storage word as a whole, which is substantially more efficient than copying
+// individual bits within each word
+template <class _Allocator>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void vector<bool, _Allocator>::__alloc_and_copy(const vector& __v) {
+ if (__v.__size_) {
+ __vallocate(__v.__size_);
+ std::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.__size_), __begin_);
+ }
+ __size_ = __v.__size_;
+}
+
template <class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(const vector& __v)
: __begin_(nullptr),
__size_(0),
__cap_(0),
__alloc_(__storage_traits::select_on_container_copy_construction(__v.__alloc_)) {
- if (__v.size() > 0) {
- __vallocate(__v.size());
- __construct_at_end(__v.begin(), __v.end(), __v.size());
- }
+ __alloc_and_copy(__v);
}
template <class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(const vector& __v, const allocator_type& __a)
: __begin_(nullptr), __size_(0), __cap_(0), __alloc_(__a) {
- if (__v.size() > 0) {
- __vallocate(__v.size());
- __construct_at_end(__v.begin(), __v.end(), __v.size());
- }
+ __alloc_and_copy(__v);
}
template <class _Allocator>
@@ -737,9 +744,8 @@ vector<bool, _Allocator>::vector(vector&& __v, const __type_identity_t<allocator
this->__cap_ = __v.__cap_;
__v.__begin_ = nullptr;
__v.__cap_ = __v.__size_ = 0;
- } else if (__v.size() > 0) {
- __vallocate(__v.size());
- __construct_at_end(__v.begin(), __v.end(), __v.size());
+ } else {
+ __alloc_and_copy(__v);
}
}
diff --git a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
index 6d21e12896ec9e..2e83fc68ef4d22 100644
--- a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
@@ -39,6 +39,36 @@ void BM_CopyConstruct(benchmark::State& st, Container) {
}
}
+template <class Container>
+void BM_MoveConstruct(benchmark::State& st, Container) {
+ auto size = st.range(0);
+ Container c(size);
+ for (auto _ : st) {
+ auto v = std::move(c);
+ DoNotOptimizeData(v);
+ }
+}
+
+template <class Container, class Allocator>
+void BM_CopyConstruct_Alloc(benchmark::State& st, Container, Allocator a) {
+ auto size = st.range(0);
+ Container c(size);
+ for (auto _ : st) {
+ Container v(c, a);
+ DoNotOptimizeData(v);
+ }
+}
+
+template <class Container, class Allocator>
+void BM_MoveConstruct_Alloc(benchmark::State& st, Container, Allocator a) {
+ auto size = st.range(0);
+ Container c(size);
+ for (auto _ : st) {
+ Container v(std::move(c), a);
+ DoNotOptimizeData(v);
+ }
+}
+
template <class Container>
void BM_Assignment(benchmark::State& st, Container) {
auto size = st.range(0);
diff --git a/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
new file mode 100644
index 00000000000000..7de6b344d43943
--- /dev/null
+++ b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "ContainerBenchmarks.h"
+#include "../GenerateInput.h"
+#include "test_allocator.h"
+
+using namespace ContainerBenchmarks;
+
+BENCHMARK_CAPTURE(BM_CopyConstruct, vector_bool, std::vector<bool>{})->Arg(5140480);
+BENCHMARK_CAPTURE(BM_MoveConstruct, vector_bool, std::vector<bool>{})->Arg(5140480);
+BENCHMARK_CAPTURE(
+ BM_CopyConstruct_Alloc, vector_bool, std::vector<bool, test_allocator<bool>>(), test_allocator<bool>(3))
+ ->Arg(5140480);
+BENCHMARK_CAPTURE(
+ BM_MoveConstruct_Alloc, vector_bool, std::vector<bool, test_allocator<bool>>(), test_allocator<bool>(3))
+ ->Arg(5140480);
+
+BENCHMARK_MAIN();
\ No newline at end of file
More information about the libcxx-commits
mailing list