[libcxx-commits] [libcxx] 056153f - Optimize vector::assign for InputIterator-only pair inputs (#113852)
via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Nov 28 11:53:03 PST 2024
Author: Peng Liu
Date: 2024-11-28T20:52:59+01:00
New Revision: 056153f36eca184f81969f5cd5c8cd967c935f96
URL: https://github.com/llvm/llvm-project/commit/056153f36eca184f81969f5cd5c8cd967c935f96
DIFF: https://github.com/llvm/llvm-project/commit/056153f36eca184f81969f5cd5c8cd967c935f96.diff
LOG: Optimize vector::assign for InputIterator-only pair inputs (#113852)
This PR optimizes the input iterator overload of `assign(_InputIterator,
_InputIterator)` in `std::vector<_Tp, _Allocator>` by directly assigning
to already initialized memory, rather than first destroying existing
elements and then constructing new ones. By eliminating unnecessary
destruction and construction, the proposed algorithm enhances the
performance by up to 2x for trivial element types (e.g.,
`std::vector<int>`), up to 2.6x for non-trivial element types like
`std::vector<std::string>`, and up to 3.4x for more complex non-trivial
types (e.g., `std::vector<std::vector<int>>`).
### Google Benchmarks
Benchmark tests (`libcxx/test/benchmarks/vector_operations.bench.cpp`)
were conducted for the `assign()` implementations before and after this
patch. The tests focused on trivial element types like
`std::vector<int>`, and non-trivial element types such as
`std::vector<std::string>` and `std::vector<std::vector<int>>`.
#### Before
```
-------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------------------
BM_AssignInputIterIter/vector_int/1024/1024 1157 ns 1169 ns 608188
BM_AssignInputIterIter<32>/vector_string/1024/1024 14559 ns 14710 ns 47277
BM_AssignInputIterIter<32>/vector_vector_int/1024/1024 26846 ns 27129 ns 25925
```
#### After
```
-------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------------------
BM_AssignInputIterIter/vector_int/1024/1024 561 ns 566 ns 1242251
BM_AssignInputIterIter<32>/vector_string/1024/1024 5604 ns 5664 ns 128365
BM_AssignInputIterIter<32>/vector_vector_int/1024/1024 7927 ns 8012 ns 88579
```
Added:
Modified:
libcxx/docs/ReleaseNotes/20.rst
libcxx/include/__vector/vector.h
libcxx/test/benchmarks/ContainerBenchmarks.h
libcxx/test/benchmarks/GenerateInput.h
libcxx/test/benchmarks/vector_operations.bench.cpp
Removed:
################################################################################
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 93d45f508d5ffe..a72fd2d5db9e26 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -69,6 +69,10 @@ Improvements and New Features
- The ``_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY`` ABI configuration was added, which allows storing valid bounds
in ``std::array::iterator`` and detecting OOB accesses when the appropriate hardening mode is enabled.
+- The input iterator overload of `assign(_InputIterator, _InputIterator)` in `std::vector<_Tp, _Allocator>` has been
+ optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
+ and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
+
Deprecations and Removals
-------------------------
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 4b9ba24e97f65e..6ba7ba7bcf724b 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -1003,9 +1003,15 @@ template <class _Tp, class _Allocator>
template <class _Iterator, class _Sentinel>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
vector<_Tp, _Allocator>::__assign_with_sentinel(_Iterator __first, _Sentinel __last) {
- clear();
- for (; __first != __last; ++__first)
- emplace_back(*__first);
+ pointer __cur = __begin_;
+ for (; __first != __last && __cur != __end_; ++__first, (void)++__cur)
+ *__cur = *__first;
+ if (__cur != __end_) {
+ __destruct_at_end(__cur);
+ } else {
+ for (; __first != __last; ++__first)
+ emplace_back(*__first);
+ }
}
template <class _Tp, class _Allocator>
diff --git a/libcxx/test/benchmarks/ContainerBenchmarks.h b/libcxx/test/benchmarks/ContainerBenchmarks.h
index 38e11777f488b7..458134c8191d12 100644
--- a/libcxx/test/benchmarks/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/ContainerBenchmarks.h
@@ -14,8 +14,9 @@
#include <iterator>
#include <utility>
-#include "Utilities.h"
#include "benchmark/benchmark.h"
+#include "Utilities.h"
+#include "test_iterators.h"
namespace ContainerBenchmarks {
@@ -50,6 +51,19 @@ void BM_Assignment(benchmark::State& st, Container) {
}
}
+template <std::size_t... sz, typename Container, typename GenInputs>
+void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
+ auto v = gen(1, sz...);
+ c.resize(st.range(0), v[0]);
+ auto in = gen(st.range(1), sz...);
+ benchmark::DoNotOptimize(&in);
+ benchmark::DoNotOptimize(&c);
+ for (auto _ : st) {
+ c.assign(cpp17_input_iterator(in.begin()), cpp17_input_iterator(in.end()));
+ benchmark::ClobberMemory();
+ }
+}
+
template <class Container>
void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
const auto size = st.range(0);
diff --git a/libcxx/test/benchmarks/GenerateInput.h b/libcxx/test/benchmarks/GenerateInput.h
index db376614382bc4..67ce798071eaa4 100644
--- a/libcxx/test/benchmarks/GenerateInput.h
+++ b/libcxx/test/benchmarks/GenerateInput.h
@@ -108,16 +108,29 @@ std::vector<IntT> getRandomIntegerInputs(std::size_t N) {
return inputs;
}
+inline std::vector<std::string> getRandomStringInputsWithLength(std::size_t N, std::size_t len) { // N-by-len
+ std::vector<std::string> inputs;
+ inputs.reserve(N);
+ for (std::size_t i = 0; i < N; ++i)
+ inputs.push_back(getRandomString(len));
+ return inputs;
+}
+
inline std::vector<std::string> getDuplicateStringInputs(std::size_t N) {
std::vector<std::string> inputs(N, getRandomString(1024));
return inputs;
}
inline std::vector<std::string> getRandomStringInputs(std::size_t N) {
- std::vector<std::string> inputs;
+ return getRandomStringInputsWithLength(N, 1024);
+}
+
+template <class IntT>
+std::vector<std::vector<IntT>> getRandomIntegerInputsWithLength(std::size_t N, std::size_t len) { // N-by-len
+ std::vector<std::vector<IntT>> inputs;
inputs.reserve(N);
for (std::size_t i = 0; i < N; ++i)
- inputs.push_back(getRandomString(1024));
+ inputs.push_back(getRandomIntegerInputs<IntT>(len));
return inputs;
}
diff --git a/libcxx/test/benchmarks/vector_operations.bench.cpp b/libcxx/test/benchmarks/vector_operations.bench.cpp
index 1855861263324d..3a72eaec4dd57e 100644
--- a/libcxx/test/benchmarks/vector_operations.bench.cpp
+++ b/libcxx/test/benchmarks/vector_operations.bench.cpp
@@ -18,7 +18,6 @@
#include <vector>
#include "benchmark/benchmark.h"
-
#include "ContainerBenchmarks.h"
#include "GenerateInput.h"
@@ -79,4 +78,17 @@ BENCHMARK(bm_grow<std::string>);
BENCHMARK(bm_grow<std::unique_ptr<int>>);
BENCHMARK(bm_grow<std::deque<int>>);
+BENCHMARK_CAPTURE(BM_AssignInputIterIter, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
+ ->Args({TestNumInputs, TestNumInputs});
+
+BENCHMARK_CAPTURE(
+ BM_AssignInputIterIter<32>, vector_string, std::vector<std::string>{}, getRandomStringInputsWithLength)
+ ->Args({TestNumInputs, TestNumInputs});
+
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<100>,
+ vector_vector_int,
+ std::vector<std::vector<int>>{},
+ getRandomIntegerInputsWithLength<int>)
+ ->Args({TestNumInputs, TestNumInputs});
+
BENCHMARK_MAIN();
More information about the libcxx-commits
mailing list