[libcxx-commits] [libcxx] [libc++] Take advantage of trivial relocation in std::vector::erase (PR #116268)

Louis Dionne via libcxx-commits libcxx-commits at lists.llvm.org
Wed Nov 27 12:03:02 PST 2024


https://github.com/ldionne updated https://github.com/llvm/llvm-project/pull/116268

>From 9790cf13d3bbcdbea207ed14722a38fd309c7a08 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Mon, 18 Nov 2024 14:15:02 +0100
Subject: [PATCH 1/2] [libc++][WIP] Add relocation facilities that P2687 could
 provide

---
 libcxx/include/CMakeLists.txt                 |  3 +
 .../is_trivially_allocator_relocatable.h      | 49 ++++++++++
 libcxx/include/__memory/relocate_at.h         | 70 ++++++++++++++
 .../__memory/uninitialized_algorithms.h       | 21 +----
 .../include/__memory/uninitialized_relocate.h | 94 +++++++++++++++++++
 libcxx/include/__vector/vector.h              |  6 +-
 libcxx/include/module.modulemap               |  5 +
 7 files changed, 228 insertions(+), 20 deletions(-)
 create mode 100644 libcxx/include/__memory/is_trivially_allocator_relocatable.h
 create mode 100644 libcxx/include/__memory/relocate_at.h
 create mode 100644 libcxx/include/__memory/uninitialized_relocate.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 2bb6d263340d30..f73a9c9af3f279 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -551,18 +551,21 @@ set(files
   __memory/construct_at.h
   __memory/destruct_n.h
   __memory/inout_ptr.h
+  __memory/is_trivially_allocator_relocatable.h
   __memory/noexcept_move_assign_container.h
   __memory/out_ptr.h
   __memory/pointer_traits.h
   __memory/ranges_construct_at.h
   __memory/ranges_uninitialized_algorithms.h
   __memory/raw_storage_iterator.h
+  __memory/relocate_at.h
   __memory/shared_count.h
   __memory/shared_ptr.h
   __memory/swap_allocator.h
   __memory/temp_value.h
   __memory/temporary_buffer.h
   __memory/uninitialized_algorithms.h
+  __memory/uninitialized_relocate.h
   __memory/unique_ptr.h
   __memory/unique_temporary_buffer.h
   __memory/uses_allocator.h
diff --git a/libcxx/include/__memory/is_trivially_allocator_relocatable.h b/libcxx/include/__memory/is_trivially_allocator_relocatable.h
new file mode 100644
index 00000000000000..c8f46661993bc5
--- /dev/null
+++ b/libcxx/include/__memory/is_trivially_allocator_relocatable.h
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___MEMORY_IS_TRIVIALLY_ALLOCATOR_RELOCATABLE_H
+#define _LIBCPP___MEMORY_IS_TRIVIALLY_ALLOCATOR_RELOCATABLE_H
+
+#include <__config>
+#include <__memory/allocator_traits.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_trivially_relocatable.h>
+#include <__type_traits/negation.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// A type is trivially allocator relocatable if the allocator's move construction and destruction
+// don't do anything beyond calling the type's move constructor and destructor, and if the type
+// itself is trivially relocatable.
+
+template <class _Alloc, class _Type>
+struct __allocator_has_trivial_move_construct : _Not<__has_construct<_Alloc, _Type*, _Type&&> > {};
+
+template <class _Type>
+struct __allocator_has_trivial_move_construct<allocator<_Type>, _Type> : true_type {};
+
+template <class _Alloc, class _Tp>
+struct __allocator_has_trivial_destroy : _Not<__has_destroy<_Alloc, _Tp*> > {};
+
+template <class _Tp, class _Up>
+struct __allocator_has_trivial_destroy<allocator<_Tp>, _Up> : true_type {};
+
+template <class _Alloc, class _Tp>
+struct __is_trivially_allocator_relocatable
+    : integral_constant<bool,
+                        __allocator_has_trivial_move_construct<_Alloc, _Tp>::value &&
+                            __allocator_has_trivial_destroy<_Alloc, _Tp>::value &&
+                            __libcpp_is_trivially_relocatable<_Tp>::value> {};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___MEMORY_IS_TRIVIALLY_ALLOCATOR_RELOCATABLE_H
diff --git a/libcxx/include/__memory/relocate_at.h b/libcxx/include/__memory/relocate_at.h
new file mode 100644
index 00000000000000..529210b5d75098
--- /dev/null
+++ b/libcxx/include/__memory/relocate_at.h
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___MEMORY_RELOCATE_AT_H
+#define _LIBCPP___MEMORY_RELOCATE_AT_H
+
+#include <__memory/allocator_traits.h>
+#include <__memory/construct_at.h>
+#include <__memory/is_trivially_allocator_relocatable.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/is_constant_evaluated.h>
+#include <__type_traits/is_trivially_relocatable.h>
+#include <__utility/move.h>
+#include <__utility/scope_guard.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Tp>
+struct __destroy_object {
+  _LIBCPP_CONSTEXPR_SINCE_CXX14 void operator()() const { std::__destroy_at(__obj_); }
+  _Tp* __obj_;
+};
+
+template <class _Tp, __enable_if_t<!__libcpp_is_trivially_relocatable<_Tp>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __relocate_at(_Tp* __source, _Tp* __dest) {
+  __scope_guard<__destroy_object<_Tp> > __guard(__destroy_object<_Tp>{__source});
+  return std::__construct_at(__dest, std::move(*__source));
+}
+
+template <class _Tp, __enable_if_t<__libcpp_is_trivially_relocatable<_Tp>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __relocate_at(_Tp* __source, _Tp* __dest) {
+  if (__libcpp_is_constant_evaluated()) {
+    std::__construct_at(__dest, std::move(*__source));
+  } else {
+    // Casting to void* to suppress clang complaining that this is technically UB.
+    __builtin_memcpy(static_cast<void*>(__dest), __source, sizeof(_Tp));
+  }
+  return __dest;
+}
+
+template <class _Alloc, class _Tp, __enable_if_t<!__is_trivially_allocator_relocatable<_Alloc, _Tp>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
+__allocator_relocate_at(_Alloc& __alloc, _Tp* __source, _Tp* __dest) {
+  allocator_traits<_Alloc>::construct(__alloc, __dest, std::move(*__source));
+  allocator_traits<_Alloc>::destroy(__alloc, __source);
+  return __dest;
+}
+
+template <class _Alloc, class _Tp, __enable_if_t<__is_trivially_allocator_relocatable<_Alloc, _Tp>::value, int> = 0>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __allocator_relocate_at(_Alloc&, _Tp* __source, _Tp* __dest) {
+  return std::__relocate_at(__source, __dest);
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___MEMORY_RELOCATE_AT_H
diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h
index 71c7ed94fec13e..1462643075a593 100644
--- a/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/libcxx/include/__memory/uninitialized_algorithms.h
@@ -20,6 +20,7 @@
 #include <__memory/addressof.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/construct_at.h>
+#include <__memory/is_trivially_allocator_relocatable.h>
 #include <__memory/pointer_traits.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/extent.h>
@@ -591,19 +592,7 @@ __uninitialized_allocator_copy(_Alloc& __alloc, _Iter1 __first1, _Sent1 __last1,
   return std::__rewrap_iter(__first2, __result);
 }
 
-template <class _Alloc, class _Type>
-struct __allocator_has_trivial_move_construct : _Not<__has_construct<_Alloc, _Type*, _Type&&> > {};
-
-template <class _Type>
-struct __allocator_has_trivial_move_construct<allocator<_Type>, _Type> : true_type {};
-
-template <class _Alloc, class _Tp>
-struct __allocator_has_trivial_destroy : _Not<__has_destroy<_Alloc, _Tp*> > {};
-
-template <class _Tp, class _Up>
-struct __allocator_has_trivial_destroy<allocator<_Tp>, _Up> : true_type {};
-
-// __uninitialized_allocator_relocate relocates the objects in [__first, __last) into __result.
+// __uninitialized_allocator_relocate_for_vector relocates the objects in [__first, __last) into __result.
 // Relocation means that the objects in [__first, __last) are placed into __result as-if by move-construct and destroy,
 // except that the move constructor and destructor may never be called if they are known to be equivalent to a memcpy.
 //
@@ -616,15 +605,13 @@ struct __allocator_has_trivial_destroy<allocator<_Tp>, _Up> : true_type {};
 // - is_copy_constructible<_ValueType>
 // - __libcpp_is_trivially_relocatable<_ValueType>
 template <class _Alloc, class _ContiguousIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __uninitialized_allocator_relocate(
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __uninitialized_allocator_relocate_for_vector(
     _Alloc& __alloc, _ContiguousIterator __first, _ContiguousIterator __last, _ContiguousIterator __result) {
   static_assert(__libcpp_is_contiguous_iterator<_ContiguousIterator>::value, "");
   using _ValueType = typename iterator_traits<_ContiguousIterator>::value_type;
   static_assert(__is_cpp17_move_insertable<_Alloc>::value,
                 "The specified type does not meet the requirements of Cpp17MoveInsertable");
-  if (__libcpp_is_constant_evaluated() || !__libcpp_is_trivially_relocatable<_ValueType>::value ||
-      !__allocator_has_trivial_move_construct<_Alloc, _ValueType>::value ||
-      !__allocator_has_trivial_destroy<_Alloc, _ValueType>::value) {
+  if (__libcpp_is_constant_evaluated() || !__is_trivially_allocator_relocatable<_Alloc, _ValueType>::value) {
     auto __destruct_first = __result;
     auto __guard          = std::__make_exception_guard(
         _AllocatorDestroyRangeReverse<_Alloc, _ContiguousIterator>(__alloc, __destruct_first, __result));
diff --git a/libcxx/include/__memory/uninitialized_relocate.h b/libcxx/include/__memory/uninitialized_relocate.h
new file mode 100644
index 00000000000000..5cea24c8ce5e46
--- /dev/null
+++ b/libcxx/include/__memory/uninitialized_relocate.h
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___MEMORY_UNINITIALIZED_RELOCATE_H
+#define _LIBCPP___MEMORY_UNINITIALIZED_RELOCATE_H
+
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__memory/allocator_traits.h>
+#include <__memory/is_trivially_allocator_relocatable.h>
+#include <__memory/pointer_traits.h>
+#include <__memory/relocate_at.h>
+#include <__type_traits/is_constant_evaluated.h>
+#include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/is_trivially_relocatable.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// __uninitialized_relocate relocates the objects in [__first, __last) into __result.
+//
+// Relocation means that the objects in [__first, __last) are placed into __result as-if by move-construct and destroy,
+// except that the move constructor and destructor may never be called if they are known to be equivalent to a memcpy.
+//
+// This algorithm works even if part of the resulting range overlaps with [__first, __last), as long as __result itself
+// is not in [__first, last).
+//
+// This algorithm doesn't throw any exceptions. However, it requires the types in the range to be nothrow
+// move-constructible and the iterator operations not to throw any exceptions.
+//
+// Preconditions:
+//  - __result doesn't contain any objects and [__first, __last) contains objects
+//  - __result is not in [__first, __last)
+// Postconditions: __result contains the objects from [__first, __last) and
+//                 [__first, __last) doesn't contain any objects
+template <class _ContiguousIterator>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ContiguousIterator __uninitialized_relocate(
+    _ContiguousIterator __first, _ContiguousIterator __last, _ContiguousIterator __result) _NOEXCEPT {
+  using _ValueType = typename iterator_traits<_ContiguousIterator>::value_type;
+  static_assert(__libcpp_is_contiguous_iterator<_ContiguousIterator>::value, "");
+  static_assert(is_nothrow_move_constructible<_ValueType>::value, "");
+  if (!__libcpp_is_constant_evaluated() && __libcpp_is_trivially_relocatable<_ValueType>::value) {
+    auto const __n = __last - __first;
+    // Casting to void* to suppress clang complaining that this is technically UB.
+    __builtin_memmove(
+        static_cast<void*>(std::__to_address(__result)), std::__to_address(__first), sizeof(_ValueType) * __n);
+    return __result + __n;
+  } else {
+    while (__first != __last) {
+      std::__relocate_at(std::__to_address(__first), std::__to_address(__result));
+      ++__first;
+      ++__result;
+    }
+    return __result;
+  }
+}
+
+// Equivalent to __uninitialized_relocate, but uses the provided allocator's construct() and destroy() methods.
+template <class _Alloc, class _ContiguousIterator>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ContiguousIterator __uninitialized_allocator_relocate(
+    _Alloc& __alloc, _ContiguousIterator __first, _ContiguousIterator __last, _ContiguousIterator __result) _NOEXCEPT {
+  using _ValueType = typename iterator_traits<_ContiguousIterator>::value_type;
+  static_assert(__libcpp_is_contiguous_iterator<_ContiguousIterator>::value, "");
+  static_assert(is_nothrow_move_constructible<_ValueType>::value, "");
+  if (!__libcpp_is_constant_evaluated() && __is_trivially_allocator_relocatable<_Alloc, _ValueType>::value) {
+    (void)__alloc; // ignore the allocator
+    return std::__uninitialized_relocate(std::move(__first), std::move(__last), std::move(__result));
+  } else {
+    while (__first != __last) {
+      std::__allocator_relocate_at(__alloc, std::__to_address(__first), std::__to_address(__result));
+      ++__first;
+      ++__result;
+    }
+    return __result;
+  }
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___MEMORY_UNINITIALIZED_RELOCATE_H
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 4b9ba24e97f65e..679fbb6a7b4960 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -806,7 +806,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v) {
   __annotate_delete();
   auto __new_begin = __v.__begin_ - (__end_ - __begin_);
-  std::__uninitialized_allocator_relocate(
+  std::__uninitialized_allocator_relocate_for_vector(
       this->__alloc_, std::__to_address(__begin_), std::__to_address(__end_), std::__to_address(__new_begin));
   __v.__begin_ = __new_begin;
   __end_       = __begin_; // All the objects have been destroyed by relocating them.
@@ -829,13 +829,13 @@ vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, a
 
   // Relocate [__p, __end_) first to avoid having a hole in [__begin_, __end_)
   // in case something in [__begin_, __p) throws.
-  std::__uninitialized_allocator_relocate(
+  std::__uninitialized_allocator_relocate_for_vector(
       this->__alloc_, std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.__end_));
   __v.__end_ += (__end_ - __p);
   __end_           = __p; // The objects in [__p, __end_) have been destroyed by relocating them.
   auto __new_begin = __v.__begin_ - (__p - __begin_);
 
-  std::__uninitialized_allocator_relocate(
+  std::__uninitialized_allocator_relocate_for_vector(
       this->__alloc_, std::__to_address(__begin_), std::__to_address(__p), std::__to_address(__new_begin));
   __v.__begin_ = __new_begin;
   __end_       = __begin_; // All the objects have been destroyed by relocating them.
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index ed2b7fb1921642..a591f38641a31b 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1535,6 +1535,7 @@ module std [system] {
     module destruct_n                         { header "__memory/destruct_n.h" }
     module fwd                                { header "__fwd/memory.h" }
     module inout_ptr                          { header "__memory/inout_ptr.h" }
+    module is_trivially_allocator_relocatable { header "__memory/is_trivially_allocator_relocatable.h" }
     module noexcept_move_assign_container     { header "__memory/noexcept_move_assign_container.h" }
     module out_ptr                            { header "__memory/out_ptr.h" }
     module pointer_traits                     { header "__memory/pointer_traits.h" }
@@ -1544,6 +1545,7 @@ module std [system] {
       export std.algorithm.in_out_result
     }
     module raw_storage_iterator               { header "__memory/raw_storage_iterator.h" }
+    module relocate_at                        { header "__memory/relocate_at.h" }
     module shared_count                       { header "__memory/shared_count.h" }
     module shared_ptr                         { header "__memory/shared_ptr.h" }
     module swap_allocator                     { header "__memory/swap_allocator.h" }
@@ -1556,6 +1558,9 @@ module std [system] {
       header "__memory/uninitialized_algorithms.h"
       export std.utility.pair
     }
+    module uninitialized_relocate {
+      header "__memory/uninitialized_relocate.h"
+    }
     module unique_ptr {
       header "__memory/unique_ptr.h"
     }

>From ac58253f9b14ca04830efcb585195b01f1bb4251 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2 at gmail.com>
Date: Thu, 14 Nov 2024 13:54:18 +0100
Subject: [PATCH 2/2] [libc++] Take advantage of trivial relocation in
 std::vector::erase

In vector::erase(iter) and vector::erase(iter, iter), we can take
advantage of a type being trivially relocatable to open up a gap
in the vector and then relocate the tail of the vector into that gap.

The benefit is that relocating an object is often more efficient than
move-assigning and then destroying the original object. For types that
can be relocated trivially but that are complicated enough for the
compiler not to optimize by itself (like std::string), this provides
around a 2x performance speedup in vector::erase (see below).

This optimization requires stopping the usage of Clang's __is_trivially_relocatable
builtin, which doesn't currently honour assignment operators like
is_trivially_copyable does and can lead us to perform incorrect
optimizations.

It is also worth noting that __uninitialized_allocator_relocate has to
be modified so that we can relocate into an overlapping range. This
has an unfortunate impact on its exception safety guarantees, which
needs to be investigated further.

   Previous implementation
   --------------------------------------------------------------------------------------
   Benchmark                                            Time             CPU   Iterations
   --------------------------------------------------------------------------------------
   BM_erase_iter_in_middle/vector_int/1024           24.9 ns         24.9 ns     28042962
   BM_erase_iter_in_middle/vector_int/4096            107 ns          107 ns      6590592
   BM_erase_iter_in_middle/vector_int/10240           271 ns          265 ns      2733478
   BM_erase_iter_in_middle/vector_string/1024         349 ns          349 ns      2005886
   BM_erase_iter_in_middle/vector_string/4096        1410 ns         1406 ns       498355
   BM_erase_iter_in_middle/vector_string/10240       3449 ns         3449 ns       201989
   BM_erase_iter_at_start/vector_int/1024            47.1 ns         47.1 ns     14836261
   BM_erase_iter_at_start/vector_int/4096             204 ns          204 ns      3430414
   BM_erase_iter_at_start/vector_int/10240            504 ns          504 ns      1391373
   BM_erase_iter_at_start/vector_string/1024          684 ns          684 ns      1025160
   BM_erase_iter_at_start/vector_string/4096         2855 ns         2806 ns       254080
   BM_erase_iter_at_start/vector_string/10240        7060 ns         7060 ns        94134

   New implementation
   --------------------------------------------------------------------------------------
   Benchmark                                            Time             CPU   Iterations
   --------------------------------------------------------------------------------------
   BM_erase_iter_in_middle/vector_int/1024           26.0 ns         25.9 ns     27127367
   BM_erase_iter_in_middle/vector_int/4096            105 ns          105 ns      6515204
   BM_erase_iter_in_middle/vector_int/10240           259 ns          258 ns      2800795
   BM_erase_iter_in_middle/vector_string/1024         148 ns          147 ns      4725706
   BM_erase_iter_in_middle/vector_string/4096         608 ns          606 ns      1168205
   BM_erase_iter_in_middle/vector_string/10240       1523 ns         1520 ns       459909
   BM_erase_iter_at_start/vector_int/1024            47.1 ns         47.1 ns     14762513
   BM_erase_iter_at_start/vector_int/4096             205 ns          205 ns      3403130
   BM_erase_iter_at_start/vector_int/10240            507 ns          507 ns      1382716
   BM_erase_iter_at_start/vector_string/1024          300 ns          300 ns      2327546
   BM_erase_iter_at_start/vector_string/4096         1205 ns         1205 ns       580855
   BM_erase_iter_at_start/vector_string/10240        4296 ns         4296 ns       162956
---
 libcxx/docs/ReleaseNotes/20.rst               |  3 +
 libcxx/include/__vector/vector.h              | 56 +++++++++++--------
 .../vector/vector.modifiers/common.h          | 22 --------
 .../vector.modifiers/erase_iter.pass.cpp      | 26 ---------
 .../vector.modifiers/erase_iter_iter.pass.cpp | 26 ---------
 5 files changed, 35 insertions(+), 98 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index d520c46bae1ef1..5e32524d73088b 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -52,6 +52,9 @@ Improvements and New Features
 - The ``lexicographical_compare`` and ``ranges::lexicographical_compare`` algorithms have been optimized for trivially
   equality comparable types, resulting in a performance improvement of up to 40x.
 
+- The ``std::vector::erase`` function has been optimized for types that can be relocated trivially (such as ``std::string``),
+  yielding speed ups witnessed to be around 2x for these types (but subject to the use case).
+
 - The ``_LIBCPP_ENABLE_CXX20_REMOVED_TEMPORARY_BUFFER`` macro has been added to make ``std::get_temporary_buffer`` and
   ``std::return_temporary_buffer`` available.
 
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 679fbb6a7b4960..eba8cba19df0c2 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -34,11 +34,13 @@
 #include <__memory/allocator.h>
 #include <__memory/allocator_traits.h>
 #include <__memory/compressed_pair.h>
+#include <__memory/is_trivially_allocator_relocatable.h>
 #include <__memory/noexcept_move_assign_container.h>
 #include <__memory/pointer_traits.h>
 #include <__memory/swap_allocator.h>
 #include <__memory/temp_value.h>
 #include <__memory/uninitialized_algorithms.h>
+#include <__memory/uninitialized_relocate.h>
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/container_compatible_range.h>
@@ -515,8 +517,36 @@ class _LIBCPP_TEMPLATE_VIS vector {
   }
 #endif
 
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position);
-  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last);
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) {
+    _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
+        __position != end(), "vector::erase(iterator) called with a non-dereferenceable iterator");
+    return erase(__position, __position + 1);
+  }
+  _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __cfirst, const_iterator __clast) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__cfirst <= __clast, "vector::erase(first, last) called with invalid range");
+
+    iterator __first = begin() + std::distance(cbegin(), __cfirst);
+    iterator __last  = begin() + std::distance(cbegin(), __clast);
+    if (__first == __last)
+      return __last;
+
+    auto __n = std::distance(__first, __last);
+
+    // If the value_type is nothrow move constructible, we destroy the range being erased and we
+    // relocate the tail of the vector into the created gap. This is especially efficient if the
+    // elements are trivially relocatable. Otherwise, we use the standard technique with move-assignments.
+    if constexpr (is_nothrow_move_constructible<value_type>::value) {
+      std::__allocator_destroy(this->__alloc_, __first, __last);
+      std::__uninitialized_allocator_relocate(this->__alloc_, __last, end(), __first);
+    } else {
+      auto __new_end = std::move(__last, end(), __first);
+      std::__allocator_destroy(this->__alloc_, __new_end, end());
+    }
+
+    this->__end_ -= __n;
+    __annotate_shrink(size() + __n);
+    return __first;
+  }
 
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT {
     size_type __old_size = size();
@@ -1108,28 +1138,6 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline
 #endif
 }
 
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI typename vector<_Tp, _Allocator>::iterator
-vector<_Tp, _Allocator>::erase(const_iterator __position) {
-  _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
-      __position != end(), "vector::erase(iterator) called with a non-dereferenceable iterator");
-  difference_type __ps = __position - cbegin();
-  pointer __p          = this->__begin_ + __ps;
-  this->__destruct_at_end(std::move(__p + 1, this->__end_, __p));
-  return __make_iter(__p);
-}
-
-template <class _Tp, class _Allocator>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::iterator
-vector<_Tp, _Allocator>::erase(const_iterator __first, const_iterator __last) {
-  _LIBCPP_ASSERT_VALID_INPUT_RANGE(__first <= __last, "vector::erase(first, last) called with invalid range");
-  pointer __p = this->__begin_ + (__first - begin());
-  if (__first != __last) {
-    this->__destruct_at_end(std::move(__p + (__last - __first), this->__end_, __p));
-  }
-  return __make_iter(__p);
-}
-
 template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 vector<_Tp, _Allocator>::__move_range(pointer __from_s, pointer __from_e, pointer __to) {
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/common.h b/libcxx/test/std/containers/sequences/vector/vector.modifiers/common.h
index 72cd47a50b2c0d..24627d9fe5f450 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/common.h
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/common.h
@@ -40,28 +40,6 @@ struct Throws {
 bool Throws::sThrows = false;
 #endif
 
-struct Tracker {
-  int copy_assignments = 0;
-  int move_assignments = 0;
-};
-
-struct TrackedAssignment {
-  Tracker* tracker_;
-  TEST_CONSTEXPR_CXX14 explicit TrackedAssignment(Tracker* tracker) : tracker_(tracker) {}
-
-  TrackedAssignment(TrackedAssignment const&) = default;
-  TrackedAssignment(TrackedAssignment&&)      = default;
-
-  TEST_CONSTEXPR_CXX14 TrackedAssignment& operator=(TrackedAssignment const&) {
-    tracker_->copy_assignments++;
-    return *this;
-  }
-  TEST_CONSTEXPR_CXX14 TrackedAssignment& operator=(TrackedAssignment&&) {
-    tracker_->move_assignments++;
-    return *this;
-  }
-};
-
 struct NonTriviallyRelocatable {
   int value_;
   TEST_CONSTEXPR NonTriviallyRelocatable() : value_(0) {}
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp
index f0157eb74b90f3..b2948ba5a46cd1 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter.pass.cpp
@@ -107,31 +107,5 @@ int main(int, char**) {
   }
 #endif
 
-  // Make sure we satisfy the complexity requirement in terms of the number of times the assignment
-  // operator is called.
-  //
-  // There is currently ambiguity as to whether this is truly mandated by the Standard, so we only
-  // test it for libc++.
-#ifdef _LIBCPP_VERSION
-  {
-    Tracker tracker;
-    std::vector<TrackedAssignment> v;
-
-    // Set up the vector with 5 elements.
-    for (int i = 0; i != 5; ++i) {
-      v.emplace_back(&tracker);
-    }
-    assert(tracker.copy_assignments == 0);
-    assert(tracker.move_assignments == 0);
-
-    // Erase element [1] from it. Elements [2] [3] [4] should be shifted, so we should
-    // see 3 move assignments (and nothing else).
-    v.erase(v.begin() + 1);
-    assert(v.size() == 4);
-    assert(tracker.copy_assignments == 0);
-    assert(tracker.move_assignments == 3);
-  }
-#endif
-
   return 0;
 }
diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp
index 104dfb4cb07d4f..fc00a720ece2f7 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/erase_iter_iter.pass.cpp
@@ -196,31 +196,5 @@ int main(int, char**) {
     assert(it == v.begin() + 2);
   }
 
-  // Make sure we satisfy the complexity requirement in terms of the number of times the assignment
-  // operator is called.
-  //
-  // There is currently ambiguity as to whether this is truly mandated by the Standard, so we only
-  // test it for libc++.
-#ifdef _LIBCPP_VERSION
-  {
-    Tracker tracker;
-    std::vector<TrackedAssignment> v;
-
-    // Set up the vector with 5 elements.
-    for (int i = 0; i != 5; ++i) {
-      v.emplace_back(&tracker);
-    }
-    assert(tracker.copy_assignments == 0);
-    assert(tracker.move_assignments == 0);
-
-    // Erase elements [1] and [2] from it. Elements [3] [4] should be shifted, so we should
-    // see 2 move assignments (and nothing else).
-    v.erase(v.begin() + 1, v.begin() + 3);
-    assert(v.size() == 3);
-    assert(tracker.copy_assignments == 0);
-    assert(tracker.move_assignments == 2);
-  }
-#endif
-
   return 0;
 }



More information about the libcxx-commits mailing list