[libcxx-commits] [libcxx] Optimize __insert_with_sentinel Function in std::vector (PR #113768)

Peng Liu via libcxx-commits libcxx-commits at lists.llvm.org
Thu Nov 7 14:11:22 PST 2024


https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/113768

>From ae7a50e6dd6b2515899b452a61b9dc6bb5b2548e Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 26 Oct 2024 13:35:48 -0400
Subject: [PATCH 1/2] Optimize __insert_with_sentinel Function in std::vector

---
 libcxx/include/__vector/vector.h              | 45 +++++++++----------
 .../insert_iter_iter_iter.pass.cpp            | 40 +++++++++++++++++
 2 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 6db202efb279b3..bc739efc729bde 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -1264,32 +1264,31 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI typename vector<_Tp, _Alloca
 vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _InputIterator __first, _Sentinel __last) {
   difference_type __off = __position - begin();
   pointer __p           = this->__begin_ + __off;
-  allocator_type& __a   = this->__alloc();
+  allocator_type& __a   = this->__alloc_;
   pointer __old_last    = this->__end_;
-  for (; this->__end_ != this->__end_cap() && __first != __last; ++__first) {
+  for (; this->__end_ != this->__cap_ && __first != __last; ++__first)
     __construct_one_at_end(*__first);
+
+  if (__first == __last)
+    (void)std::rotate(__p, __old_last, this->__end_);
+  else {
+    __split_buffer<value_type, allocator_type&> __v(__a);
+    auto __guard = std::__make_exception_guard(
+        _AllocatorDestroyRangeReverse<allocator_type, pointer>(__a, __old_last, this->__end_));
+    __v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
+    __split_buffer<value_type, allocator_type&> __merged(__recommend(size() + __v.size()), __off, __a);
+    std::__uninitialized_allocator_relocate(
+        __a, std::__to_address(__old_last), std::__to_address(this->__end_), std::__to_address(__merged.__end_));
+    __guard.__complete(); // Release the guard once objects in [__old_last_, __end_) have been successfully relocated.
+    __merged.__end_ += this->__end_ - __old_last;
+    this->__end_ = __old_last;
+    std::__uninitialized_allocator_relocate(
+        __a, std::__to_address(__v.__begin_), std::__to_address(__v.__end_), std::__to_address(__merged.__end_));
+    __merged.__end_ += __v.size();
+    __v.__end_   = __v.__begin_;
+    __p          = __swap_out_circular_buffer(__merged, __p);
   }
-  __split_buffer<value_type, allocator_type&> __v(__a);
-  if (__first != __last) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      __v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
-      difference_type __old_size = __old_last - this->__begin_;
-      difference_type __old_p    = __p - this->__begin_;
-      reserve(__recommend(size() + __v.size()));
-      __p        = this->__begin_ + __old_p;
-      __old_last = this->__begin_ + __old_size;
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      erase(__make_iter(__old_last), end());
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
-  }
-  __p = std::rotate(__p, __old_last, this->__end_);
-  insert(__make_iter(__p), std::make_move_iterator(__v.begin()), std::make_move_iterator(__v.end()));
-  return begin() + __off;
+  return __make_iter(__p);
 }
 
 template <class _Tp, class _Allocator>
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp
index 934b85ce01c67b..8dce6e5c1a690e 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp
@@ -46,6 +46,46 @@ TEST_CONSTEXPR_CXX20 bool tests()
         for (; j < 105; ++j)
             assert(v[j] == 0);
     }
+    {   // Vector may or may not need to reallocate because of the insertion -- test both cases.
+      { // The input range is shorter than the remaining capacity of the vector -- ensure no reallocation happens.
+        typedef std::vector<int> V;
+        V v(100);
+        v.reserve(v.size() + 10);
+        int a[]     = {1, 2, 3, 4, 5};
+        const int N = sizeof(a) / sizeof(a[0]);
+        V::iterator i =
+            v.insert(v.cbegin() + 10, cpp17_input_iterator<const int*>(a), cpp17_input_iterator<const int*>(a + N));
+        assert(v.size() == 100 + N);
+        assert(is_contiguous_container_asan_correct(v));
+        assert(i == v.begin() + 10);
+        int j;
+        for (j = 0; j < 10; ++j)
+          assert(v[j] == 0);
+        for (std::size_t k = 0; k < N; ++j, ++k)
+          assert(v[j] == a[k]);
+        for (; j < 105; ++j)
+          assert(v[j] == 0);
+      }
+      { // The input range is longer than the remaining capacity of the vector -- ensure reallocation happens.
+        typedef std::vector<int> V;
+        V v(100);
+        v.reserve(v.size() + 2);
+        int a[]     = {1, 2, 3, 4, 5};
+        const int N = sizeof(a) / sizeof(a[0]);
+        V::iterator i =
+            v.insert(v.cbegin() + 10, cpp17_input_iterator<const int*>(a), cpp17_input_iterator<const int*>(a + N));
+        assert(v.size() == 100 + N);
+        assert(is_contiguous_container_asan_correct(v));
+        assert(i == v.begin() + 10);
+        int j;
+        for (j = 0; j < 10; ++j)
+          assert(v[j] == 0);
+        for (std::size_t k = 0; k < N; ++j, ++k)
+          assert(v[j] == a[k]);
+        for (; j < 105; ++j)
+          assert(v[j] == 0);
+      }
+    }
     {
         typedef std::vector<int> V;
         V v(100);

>From a31c874374982a268657fa3126a2343f7dcf4944 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Thu, 7 Nov 2024 17:09:30 -0500
Subject: [PATCH 2/2] Add benchmark tests

---
 libcxx/test/benchmarks/ContainerBenchmarks.h  | 84 +++++++++++++++++++
 libcxx/test/benchmarks/GenerateInput.h        |  7 ++
 .../benchmarks/vector_operations.bench.cpp    | 16 ++++
 3 files changed, 107 insertions(+)

diff --git a/libcxx/test/benchmarks/ContainerBenchmarks.h b/libcxx/test/benchmarks/ContainerBenchmarks.h
index 742c848328604c..b92f8fae1e8e8d 100644
--- a/libcxx/test/benchmarks/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/ContainerBenchmarks.h
@@ -119,6 +119,90 @@ void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
   }
 }
 
+// Wrap any Iterator into an input iterator
+template <typename Iterator>
+class InputIterator {
+  using iter_traits = std::iterator_traits<Iterator>;
+
+public:
+  using iterator_category = std::input_iterator_tag;
+  using value_type        = typename iter_traits::value_type;
+  using difference_type   = typename iter_traits::difference_type;
+  using pointer           = typename iter_traits::pointer;
+  using reference         = typename iter_traits::reference;
+
+  InputIterator(Iterator it) : current_(it) {}
+
+  reference operator*() { return *current_; }
+  InputIterator& operator++() {
+    ++current_;
+    return *this;
+  }
+  InputIterator operator++(int) {
+    InputIterator tmp = *this;
+    ++(*this);
+    return tmp;
+  }
+
+  friend bool operator==(const InputIterator& lhs, const InputIterator& rhs) { return lhs.current_ == rhs.current_; }
+  friend bool operator!=(const InputIterator& lhs, const InputIterator& rhs) { return !(lhs == rhs); }
+
+private:
+  Iterator current_;
+};
+
+template <typename Iterator>
+InputIterator<Iterator> make_input_iterator(Iterator it) {
+  return InputIterator<Iterator>(it);
+}
+
+// Test case: vector is empty
+template <class Container, class GenInputs>
+void BM_InsertIterInputIterIterEmpty(benchmark::State& st, Container _, GenInputs gen) {
+  auto in = gen(st.range(0));
+  benchmark::DoNotOptimize(&in);
+  for (auto _ : st) {
+    Container c;
+    benchmark::DoNotOptimize(&c);
+    benchmark::DoNotOptimize(c.insert(c.begin(), make_input_iterator(in.begin()), make_input_iterator(in.end())));
+    benchmark::ClobberMemory();
+  }
+}
+
+// Test case: vector is half full
+template <class Container, class GenInputs>
+void BM_InsertIterInputIterIterHalfFull(benchmark::State& st, Container c, GenInputs gen) {
+  for (auto _ : st) {
+    st.PauseTiming();
+    c = gen(st.range(0));
+    c.reserve(c.size() * 2);
+    auto in = gen(c.size() + 10);
+    benchmark::DoNotOptimize(&c);
+    benchmark::DoNotOptimize(&in);
+    st.ResumeTiming();
+
+    benchmark::DoNotOptimize(c.insert(c.begin(), make_input_iterator(in.begin()), make_input_iterator(in.end())));
+    benchmark::ClobberMemory();
+  }
+}
+
+// Test case: vector is almost full
+template <class Container, class GenInputs>
+void BM_InsertIterInputIterIterFull(benchmark::State& st, Container c, GenInputs gen) {
+  for (auto _ : st) {
+    st.PauseTiming();
+    c = gen(st.range(0));
+    c.reserve(c.size() + 5);
+    auto in = gen(10);
+    benchmark::DoNotOptimize(&c);
+    benchmark::DoNotOptimize(&in);
+    st.ResumeTiming();
+
+    benchmark::DoNotOptimize(c.insert(c.begin(), make_input_iterator(in.begin()), make_input_iterator(in.end())));
+    benchmark::ClobberMemory();
+  }
+}
+
 template <class Container, class GenInputs>
 void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
   auto in        = gen(st.range(0));
diff --git a/libcxx/test/benchmarks/GenerateInput.h b/libcxx/test/benchmarks/GenerateInput.h
index cc1694311473ed..66e43ee36f4546 100644
--- a/libcxx/test/benchmarks/GenerateInput.h
+++ b/libcxx/test/benchmarks/GenerateInput.h
@@ -119,6 +119,13 @@ inline std::vector<std::string> getRandomStringInputs(size_t N) {
   return inputs;
 }
 
+inline std::vector<std::string> getShortRandomStringInputs(size_t N) {
+  std::vector<std::string> inputs;
+  for (size_t i = 0; i < N; ++i)
+    inputs.push_back(getRandomString(10)); // SSO
+  return inputs;
+}
+
 inline std::vector<std::string> getPrefixedRandomStringInputs(size_t N) {
   std::vector<std::string> inputs;
   constexpr int kSuffixLength = 32;
diff --git a/libcxx/test/benchmarks/vector_operations.bench.cpp b/libcxx/test/benchmarks/vector_operations.bench.cpp
index b0dffe35ec6e14..bc73af699dc81d 100644
--- a/libcxx/test/benchmarks/vector_operations.bench.cpp
+++ b/libcxx/test/benchmarks/vector_operations.bench.cpp
@@ -67,4 +67,20 @@ BENCHMARK(bm_grow<std::string>);
 BENCHMARK(bm_grow<std::unique_ptr<int>>);
 BENCHMARK(bm_grow<std::deque<int>>);
 
+BENCHMARK_CAPTURE(BM_InsertIterInputIterIterEmpty, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
+    ->Arg(TestNumInputs);
+BENCHMARK_CAPTURE(BM_InsertIterInputIterIterHalfFull, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
+    ->Arg(TestNumInputs);
+BENCHMARK_CAPTURE(BM_InsertIterInputIterIterFull, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
+    ->Arg(TestNumInputs);
+
+BENCHMARK_CAPTURE(
+    BM_InsertIterInputIterIterEmpty, vector_string, std::vector<std::string>{}, getShortRandomStringInputs)
+    ->Arg(TestNumInputs);
+BENCHMARK_CAPTURE(
+    BM_InsertIterInputIterIterHalfFull, vector_string, std::vector<std::string>{}, getShortRandomStringInputs)
+    ->Arg(TestNumInputs);
+BENCHMARK_CAPTURE(BM_InsertIterInputIterIterFull, vector_string, std::vector<std::string>{}, getShortRandomStringInputs)
+    ->Arg(TestNumInputs);
+
 BENCHMARK_MAIN();



More information about the libcxx-commits mailing list