[libcxx-commits] [libcxx] [libc++] Refactor the sequence container benchmarks (PR #119763)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Thu Jan 16 11:32:42 PST 2025
================
@@ -0,0 +1,510 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_BENCHMARKS_CONTAINERS_CONTAINER_BENCHMARKS_H
+#define TEST_BENCHMARKS_CONTAINERS_CONTAINER_BENCHMARKS_H
+
+#include <cstddef>
+#include <iterator>
+#include <ranges> // for std::from_range
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+
+namespace ContainerBenchmarks {
+
+template <class Container>
+void DoNotOptimizeData(Container& c) {
+ if constexpr (requires { c.data(); }) {
+ benchmark::DoNotOptimize(c.data());
+ } else {
+ benchmark::DoNotOptimize(&c);
+ }
+}
+
+//
+// Sequence container operations
+//
+template <class Container>
+void BM_ctor_size(benchmark::State& st) {
+ auto size = st.range(0);
+ char buffer[sizeof(Container)];
+ for (auto _ : st) {
+ std::construct_at(reinterpret_cast<Container*>(buffer), size);
+ benchmark::DoNotOptimize(buffer);
+ st.PauseTiming();
+ std::destroy_at(reinterpret_cast<Container*>(buffer));
+ st.ResumeTiming();
+ }
+}
+
+template <class Container>
+void BM_ctor_size_value(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ const auto size = st.range(0);
+ ValueType value{};
+ benchmark::DoNotOptimize(value);
+ char buffer[sizeof(Container)];
+ for (auto _ : st) {
+ std::construct_at(reinterpret_cast<Container*>(buffer), size, value);
+ benchmark::DoNotOptimize(buffer);
+ st.PauseTiming();
+ std::destroy_at(reinterpret_cast<Container*>(buffer));
+ st.ResumeTiming();
+ }
+}
+
+template <class Container>
+void BM_ctor_iter_iter(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ const auto size = st.range(0);
+ std::vector<ValueType> in(size);
+ const auto begin = in.begin();
+ const auto end = in.end();
+ benchmark::DoNotOptimize(in);
+ char buffer[sizeof(Container)];
+ for (auto _ : st) {
+ std::construct_at(reinterpret_cast<Container*>(buffer), begin, end);
+ benchmark::DoNotOptimize(buffer);
+ st.PauseTiming();
+ std::destroy_at(reinterpret_cast<Container*>(buffer));
+ st.ResumeTiming();
+ }
+}
+
+#if TEST_STD_VER >= 23
+template <class Container>
+void BM_ctor_from_range(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ const auto size = st.range(0);
+ std::vector<ValueType> in(size);
+ benchmark::DoNotOptimize(in);
+ char buffer[sizeof(Container)];
+ for (auto _ : st) {
+ std::construct_at(reinterpret_cast<Container*>(buffer), std::from_range, in);
+ benchmark::DoNotOptimize(buffer);
+ st.PauseTiming();
+ std::destroy_at(reinterpret_cast<Container*>(buffer));
+ st.ResumeTiming();
+ }
+}
+#endif
+
+template <class Container>
+void BM_ctor_copy(benchmark::State& st) {
+ auto size = st.range(0);
+ Container c(size);
+ char buffer[sizeof(Container)];
+ for (auto _ : st) {
+ std::construct_at(reinterpret_cast<Container*>(buffer), c);
+ benchmark::DoNotOptimize(buffer);
+ st.PauseTiming();
+ std::destroy_at(reinterpret_cast<Container*>(buffer));
+ st.ResumeTiming();
+ }
+}
+
+template <class Container>
+void BM_assignment(benchmark::State& st) {
+ auto size = st.range(0);
+ Container c1;
+ Container c2(size);
+ for (auto _ : st) {
+ c1 = c2;
+ DoNotOptimizeData(c1);
+ DoNotOptimizeData(c2);
+ }
+}
+
+template <typename Container>
+void BM_assign_inputiter(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ auto size = st.range(0);
+ std::vector<ValueType> inputs(size);
+ Container c(inputs.begin(), inputs.end());
+ DoNotOptimizeData(c);
+ DoNotOptimizeData(inputs);
+ ValueType* first = inputs.data();
+ ValueType* last = inputs.data() + inputs.size();
+
+ for (auto _ : st) {
+ c.assign(cpp17_input_iterator(first), cpp17_input_iterator(last));
+ benchmark::ClobberMemory();
+ }
+}
+
+template <class Container>
+void BM_insert_start(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ const int count = st.range(0);
+ std::vector<ValueType> inputs(count);
+ Container c(inputs.begin(), inputs.end());
+ DoNotOptimizeData(c);
+
+ ValueType value{};
+ benchmark::DoNotOptimize(value);
+
+ for (auto _ : st) {
+ c.insert(c.begin(), value);
+ DoNotOptimizeData(c);
+
+ c.erase(std::prev(c.end())); // avoid growing indefinitely
+ }
+}
+
+template <class Container>
+ requires std::random_access_iterator<typename Container::iterator>
+void BM_insert_middle(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ const int count = st.range(0);
+ std::vector<ValueType> inputs(count);
+ Container c(inputs.begin(), inputs.end());
+ DoNotOptimizeData(c);
+
+ ValueType value{};
+ benchmark::DoNotOptimize(value);
+
+ for (auto _ : st) {
+ auto mid = c.begin() + (count / 2); // requires random-access iterators in order to make sense
+ c.insert(mid, value);
+ DoNotOptimizeData(c);
+
+ c.erase(c.end() - 1); // avoid growing indefinitely
+ }
+}
+
+template <class Container>
+void BM_insert_input_iter_with_reserve_no_realloc(benchmark::State& st) {
+ using ValueType = typename Container::value_type;
+ const int count = st.range(0);
+ std::vector<ValueType> inputs(count);
+ const auto beg = cpp17_input_iterator(inputs.begin());
+ const auto end = cpp17_input_iterator(inputs.end());
+
+ auto size = 100; // arbitrary
+ Container c(size);
+ c.reserve(size + inputs.size()); // ensure no reallocation
+ for (auto _ : st) {
+ c.insert(c.begin(), beg, end);
+ DoNotOptimizeData(c);
+
+ st.PauseTiming();
+ c.erase(c.begin() + size, c.end()); // avoid growing indefinitely
+ st.ResumeTiming();
+ }
+}
+
+template <class Container>
+void BM_insert_input_iter_with_reserve_half_filled(benchmark::State& st) {
----------------
winner245 wrote:
Sorry. I might have misread your code... Yes, it does insert `count / 2` elements during construction. I am trying to understand why your test leads to different performance measurement than mine. I can tell there two major differences which might have caused the difference:
1. **Test Setup**
In my test, I first reserved space for `2*n` elements, and then assigned `n` elements to container `c`. So I am left with a free space of `n`. Next, I inserted `n + 10` elements, where the first `n` elements were inserted into the free space, and the extra 10 were inserted into a temporary `__split_buffer`. I have intentionally chosen `n + 10` to favor the scenario for the improvement in my PR. This setting tests reallocation without significantly affecting the performance due to the extra buffer. The primary improvement comes from avoiding unnecessary rotation and movement of ranges, which are irrelevant to the extra buffer. If the extra buffer takes too much time to process, it would offset my improvement. Therefore, under the specific choice of `n + a` for some constant `a = O(1)`, my performance improvement is maximized.
In your test, let’s denote`n = count / 2`. Your code first constructs a container `c` with `n` elements and then reserves space for `2n` elements. This is similar to my test so far. Then, you insert `2*n` elements, where the first `n` fit into the free space, and the remaining `n` require a `__split_buffer` of size `n` (compared to the constant `10` in my test). Hence, the buffer processing time in your test is **linear** (compared to **constant** in my test), which takes a significant portion of time.
2. **Input size** I noticed that the input size in your tests is 1024. I have used much larger input size to obtain a reliable result.
In summary, my original tests were performed under a setting favorable to the proposed changes in my PR, and they used a much larger input size, which might have led to the performance difference.
> In contrast, your benchmark was reserving and then assigning, which will not take advantage of the additional capacity. That is because assignment will replace the underlying buffer, in this case effectively shrinking the vector. Do you agree?
For `std::deque`, yes. The assignment operator calls `__maybe_remove_back_spare()` to remove spare space under certain conditions, which might lead to shrinking of the container following assignment. For `vector`, assignment leads to reallocation only when the LHS and RHS vectors have incompatible allocators. If the allocators are compatible (as in our test), the assignment reuses the LHS's current space (the reserved `2n` space in my test). Hence, my tests work well for vector (as my PR dealt with vector), but might not for `deque`. Since your refactoring aims to generalize these tests for sequence containers, I agree that the changes you made are necessary. Please go ahead and apply the changes.
https://github.com/llvm/llvm-project/pull/119763
More information about the libcxx-commits
mailing list