[libcxx-commits] [libcxx] Optimize __insert_with_sentinel Function in std::vector (PR #113768)

Peng Liu via libcxx-commits libcxx-commits at lists.llvm.org
Sat Oct 26 10:38:47 PDT 2024


https://github.com/winner245 created https://github.com/llvm/llvm-project/pull/113768

This PR includes optimizations and enhancements to the `__insert_with_sentinel` function in the `std::vector` implementation, focusing on performance improvements and better exception handling.

**Details**:

1. **Performance Improvement**:
   - The existing implementation triggers a `reserve()` operation, causing a round of memory relocation, followed by an additional `std::rotate` operation, and finally another memory relocation for elements after the insertion point.
   - The improved version eliminates redundant operations by directly relocating both existing and new elements to their final positions within the allocated `__split_buffer`.
   - This optimization reduces the total cost from `2 relocations + 1 std::rotate` to `1 relocation` without the need for `std::rotate`, improving the overall performance. Note that `std::rotate` is only needed when the vector has enough space for insertion where reallocation does not happen.

2. **Exception Safety**:
   - Replaced the traditional `try-catch` mechanism with the modern approach of exception guard. 
 

**Testing**:
New test cases are added in `libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp`, which check both cases where reallocation happens or not. Passed all existing libcxx tests for `std::vector` based on my local testing.


>From be9ddb6f6a3e80d2731fac7c5afca12108fa6c98 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Sat, 26 Oct 2024 13:35:48 -0400
Subject: [PATCH] Optimize __insert_with_sentinel Function in std::vector

---
 libcxx/include/__vector/vector.h              | 40 +++++++++----------
 .../insert_iter_iter_iter.pass.cpp            | 40 +++++++++++++++++++
 2 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 7889e8c2201ac1..02f91b537c7e22 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -1360,27 +1360,27 @@ vector<_Tp, _Allocator>::__insert_with_sentinel(const_iterator __position, _Inpu
   for (; this->__end_ != this->__end_cap() && __first != __last; ++__first) {
     __construct_one_at_end(*__first);
   }
-  __split_buffer<value_type, allocator_type&> __v(__a);
-  if (__first != __last) {
-#if _LIBCPP_HAS_EXCEPTIONS
-    try {
-#endif // _LIBCPP_HAS_EXCEPTIONS
-      __v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
-      difference_type __old_size = __old_last - this->__begin_;
-      difference_type __old_p    = __p - this->__begin_;
-      reserve(__recommend(size() + __v.size()));
-      __p        = this->__begin_ + __old_p;
-      __old_last = this->__begin_ + __old_size;
-#if _LIBCPP_HAS_EXCEPTIONS
-    } catch (...) {
-      erase(__make_iter(__old_last), end());
-      throw;
-    }
-#endif // _LIBCPP_HAS_EXCEPTIONS
+
+  if (__first == __last)
+    (void)std::rotate(__p, __old_last, this->__end_);
+  else {
+    __split_buffer<value_type, allocator_type&> __v(__a);
+    auto __guard =
+        std::__make_exception_guard(_AllocatorDestroyRangeReverse<allocator_type, pointer>(__a, __old_last, __end_));
+    __v.__construct_at_end_with_sentinel(std::move(__first), std::move(__last));
+    __split_buffer<value_type, allocator_type&> __merged(__recommend(size() + __v.size()), __off, __a);
+    std::__uninitialized_allocator_relocate(
+        __a, std::__to_address(__old_last), std::__to_address(__end_), std::__to_address(__merged.__end_));
+    __merged.__end_ += __end_ - __old_last;
+    __end_ = __old_last;
+    __guard.__complete();
+    std::__uninitialized_allocator_relocate(
+        __a, std::__to_address(__v.__begin_), std::__to_address(__v.__end_), std::__to_address(__merged.__end_));
+    __merged.__end_ += __v.size();
+    __v.__begin_ = __v.__end_;
+    __p          = __swap_out_circular_buffer(__merged, __p);
   }
-  __p = std::rotate(__p, __old_last, this->__end_);
-  insert(__make_iter(__p), std::make_move_iterator(__v.begin()), std::make_move_iterator(__v.end()));
-  return begin() + __off;
+  return __make_iter(__p);
 }
 
 template <class _Tp, class _Allocator>
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp
index 934b85ce01c67b..8dce6e5c1a690e 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp
@@ -46,6 +46,46 @@ TEST_CONSTEXPR_CXX20 bool tests()
         for (; j < 105; ++j)
             assert(v[j] == 0);
     }
+    {   // Vector may or may not need to reallocate because of the insertion -- test both cases.
+      { // The input range is shorter than the remaining capacity of the vector -- ensure no reallocation happens.
+        typedef std::vector<int> V;
+        V v(100);
+        v.reserve(v.size() + 10);
+        int a[]     = {1, 2, 3, 4, 5};
+        const int N = sizeof(a) / sizeof(a[0]);
+        V::iterator i =
+            v.insert(v.cbegin() + 10, cpp17_input_iterator<const int*>(a), cpp17_input_iterator<const int*>(a + N));
+        assert(v.size() == 100 + N);
+        assert(is_contiguous_container_asan_correct(v));
+        assert(i == v.begin() + 10);
+        int j;
+        for (j = 0; j < 10; ++j)
+          assert(v[j] == 0);
+        for (std::size_t k = 0; k < N; ++j, ++k)
+          assert(v[j] == a[k]);
+        for (; j < 105; ++j)
+          assert(v[j] == 0);
+      }
+      { // The input range is longer than the remaining capacity of the vector -- ensure reallocation happens.
+        typedef std::vector<int> V;
+        V v(100);
+        v.reserve(v.size() + 2);
+        int a[]     = {1, 2, 3, 4, 5};
+        const int N = sizeof(a) / sizeof(a[0]);
+        V::iterator i =
+            v.insert(v.cbegin() + 10, cpp17_input_iterator<const int*>(a), cpp17_input_iterator<const int*>(a + N));
+        assert(v.size() == 100 + N);
+        assert(is_contiguous_container_asan_correct(v));
+        assert(i == v.begin() + 10);
+        int j;
+        for (j = 0; j < 10; ++j)
+          assert(v[j] == 0);
+        for (std::size_t k = 0; k < N; ++j, ++k)
+          assert(v[j] == a[k]);
+        for (; j < 105; ++j)
+          assert(v[j] == 0);
+      }
+    }
     {
         typedef std::vector<int> V;
         V v(100);



More information about the libcxx-commits mailing list