[libcxx-commits] [libcxx] [libc++] Optimize vector growing of trivially relocatable types (PR #76657)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Sun Dec 31 05:28:14 PST 2023


https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/76657

```
--------------------------------------------------
Benchmark                           old        new
--------------------------------------------------
bm_grow<int>                    1354 ns    1301 ns
bm_grow<std::string>            5584 ns    3370 ns
bm_grow<std::unique_ptr<int>>   3506 ns    1994 ns
bm_grow<std::deque<int>>       27114 ns   27209 ns
```



>From 46b8f19719b6830d20866e85e61a339d8fcf1a44 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 31 Dec 2023 13:35:00 +0100
Subject: [PATCH] [libc++] Optimize vector growing of trivially relocatable
 types

---
 libcxx/benchmarks/vector_operations.bench.cpp | 16 ++++++++
 libcxx/docs/ReleaseNotes/18.rst               |  2 +
 libcxx/include/CMakeLists.txt                 |  1 +
 libcxx/include/__fwd/string.h                 |  6 +++
 .../__memory/uninitialized_algorithms.h       | 20 ++++++++++
 libcxx/include/__memory/unique_ptr.h          |  4 ++
 .../__type_traits/is_trivially_relocatable.h  | 32 +++++++++++++++
 libcxx/include/vector                         | 39 +++++++++++++------
 8 files changed, 109 insertions(+), 11 deletions(-)
 create mode 100644 libcxx/include/__type_traits/is_trivially_relocatable.h

diff --git a/libcxx/benchmarks/vector_operations.bench.cpp b/libcxx/benchmarks/vector_operations.bench.cpp
index 38b14c56756fba..d7d8ac4358aef4 100644
--- a/libcxx/benchmarks/vector_operations.bench.cpp
+++ b/libcxx/benchmarks/vector_operations.bench.cpp
@@ -1,6 +1,7 @@
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <deque>
 #include <functional>
 #include <vector>
 
@@ -41,4 +42,19 @@ BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_string, std::vector<std::string>
 
 BENCHMARK_CAPTURE(BM_Pushback, vector_int, std::vector<int>{})->Arg(TestNumInputs);
 
+template <class T>
+void bm_grow(benchmark::State& state) {
+  for (auto _ : state) {
+    std::vector<T> vec;
+    benchmark::DoNotOptimize(vec);
+    for (size_t i = 0; i != 2048; ++i)
+      vec.emplace_back();
+    benchmark::DoNotOptimize(vec);
+  }
+}
+BENCHMARK(bm_grow<int>);
+BENCHMARK(bm_grow<std::string>);
+BENCHMARK(bm_grow<std::unique_ptr<int>>);
+BENCHMARK(bm_grow<std::deque<int>>);
+
 BENCHMARK_MAIN();
diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst
index fa60a581652d6a..412140bf2bdbce 100644
--- a/libcxx/docs/ReleaseNotes/18.rst
+++ b/libcxx/docs/ReleaseNotes/18.rst
@@ -69,6 +69,8 @@ Improvements and New Features
 - ``std::for_each`` has been optimized for segmented iterators like ``std::deque::iterator`` in C++23 and
   later, which can lead up to 40x performance improvements.
 
+- The performance of growing ``std::vector`` has been improved for trivially relocatable types.
+
 - The library now provides several hardening modes under which common cases of library undefined behavior will be turned
   into a reliable program termination. The ``fast`` hardening mode enables a set of security-critical checks with
   minimal runtime overhead; the ``extensive`` hardening mode additionally enables relatively cheap checks that catch
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 0fe3ab44d2466e..d0436aeda3d157 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -803,6 +803,7 @@ set(files
   __type_traits/is_trivially_lexicographically_comparable.h
   __type_traits/is_trivially_move_assignable.h
   __type_traits/is_trivially_move_constructible.h
+  __type_traits/is_trivially_relocatable.h
   __type_traits/is_unbounded_array.h
   __type_traits/is_union.h
   __type_traits/is_unsigned.h
diff --git a/libcxx/include/__fwd/string.h b/libcxx/include/__fwd/string.h
index 032132374de5ed..b811f4a5653306 100644
--- a/libcxx/include/__fwd/string.h
+++ b/libcxx/include/__fwd/string.h
@@ -12,6 +12,8 @@
 #include <__availability>
 #include <__config>
 #include <__fwd/memory_resource.h>
+#include <__type_traits/is_fundamental.h>
+#include <__type_traits/is_trivially_relocatable.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -45,6 +47,10 @@ class _LIBCPP_TEMPLATE_VIS allocator;
 template <class _CharT, class _Traits = char_traits<_CharT>, class _Allocator = allocator<_CharT> >
 class _LIBCPP_TEMPLATE_VIS basic_string;
 
+// TODO: This could be extended to also allow custom allocators and possibly custom chars and char traits
+template <class _CharT>
+struct __libcpp_is_trivially_relocatable<basic_string<_CharT>> : is_fundamental<_CharT> {};
+
 using string = basic_string<char>;
 
 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h
index 2a4ecf655be287..3a8364de01617f 100644
--- a/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/libcxx/include/__memory/uninitialized_algorithms.h
@@ -29,6 +29,7 @@
 #include <__type_traits/is_trivially_copy_constructible.h>
 #include <__type_traits/is_trivially_move_assignable.h>
 #include <__type_traits/is_trivially_move_constructible.h>
+#include <__type_traits/is_trivially_relocatable.h>
 #include <__type_traits/is_unbounded_array.h>
 #include <__type_traits/negation.h>
 #include <__type_traits/remove_const.h>
@@ -646,6 +647,25 @@ __uninitialized_allocator_move_if_noexcept(_Alloc&, _Iter1 __first1, _Iter1 __la
 }
 #endif // _LIBCPP_COMPILER_GCC
 
+template <class _Alloc, class _Tp>
+struct __allocator_has_trivial_destroy : _Not<__has_destroy<_Alloc, _Tp*>> {};
+
+template <class _Tp, class _Up>
+struct __allocator_has_trivial_destroy<allocator<_Tp>, _Up> : true_type {};
+
+template <class _Alloc, class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
+__uninitialized_allocator_relocate(_Alloc& __alloc, _Tp* __first, _Tp* __last, _Tp* __out) {
+  if (__libcpp_is_constant_evaluated() || !__libcpp_is_trivially_relocatable<_Tp>::value ||
+      !__allocator_has_trivial_move_construct<_Alloc, _Tp>::value ||
+      !__allocator_has_trivial_destroy<_Alloc, _Tp>::value) {
+    std::__uninitialized_allocator_move_if_noexcept(__alloc, __first, __last, __out);
+    std::__allocator_destroy(__alloc, __first, __last);
+  } else {
+    __builtin_memcpy(__out, __first, sizeof(_Tp) * (__last - __first));
+  }
+}
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___MEMORY_UNINITIALIZED_ALGORITHMS_H
diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h
index 7bf5e3c5e4e6b5..17f7adcdded29a 100644
--- a/libcxx/include/__memory/unique_ptr.h
+++ b/libcxx/include/__memory/unique_ptr.h
@@ -33,6 +33,7 @@
 #include <__type_traits/is_reference.h>
 #include <__type_traits/is_same.h>
 #include <__type_traits/is_swappable.h>
+#include <__type_traits/is_trivially_relocatable.h>
 #include <__type_traits/is_void.h>
 #include <__type_traits/remove_extent.h>
 #include <__type_traits/type_identity.h>
@@ -273,6 +274,9 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr {
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void swap(unique_ptr& __u) _NOEXCEPT { __ptr_.swap(__u.__ptr_); }
 };
 
+template <class _Tp>
+struct __libcpp_is_trivially_relocatable<unique_ptr<_Tp>> : true_type {};
+
 template <class _Tp, class _Dp>
 class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr<_Tp[], _Dp> {
 public:
diff --git a/libcxx/include/__type_traits/is_trivially_relocatable.h b/libcxx/include/__type_traits/is_trivially_relocatable.h
new file mode 100644
index 00000000000000..4b95d972ebcf9d
--- /dev/null
+++ b/libcxx/include/__type_traits/is_trivially_relocatable.h
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_RELOCATABLE_H
+#define _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_RELOCATABLE_H
+
+#include <__config>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_trivial.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if __has_builtin(__is_trivially_relocatable)
+template <class _Tp>
+struct __libcpp_is_trivially_relocatable : integral_constant<bool, __is_trivially_relocatable(_Tp)> {};
+#else
+template <class _Tp>
+struct __libcpp_is_trivially_relocatable : is_trivial<_Tp> {};
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_RELOCATABLE_H
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 0098273a195ff8..1e368fdcb8d3dd 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -982,14 +982,18 @@ template <ranges::input_range _Range,
 vector(from_range_t, _Range&&, _Alloc = _Alloc()) -> vector<ranges::range_value_t<_Range>, _Alloc>;
 #endif
 
+// __swap_out_circular_buffer relocates the objects in [__begin_, __end_) into the front of __v and swaps the buffers of
+// *this and __v. It is assumed that __v provides space for exactly (__end_ - __begin_) objects in the front. This
+// function has a strong exception guarantee.
 template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v) {
   __annotate_delete();
-  using _RevIter = std::reverse_iterator<pointer>;
-  __v.__begin_   = std::__uninitialized_allocator_move_if_noexcept(
-                     __alloc(), _RevIter(__end_), _RevIter(__begin_), _RevIter(__v.__begin_))
-                     .base();
+  auto __new_begin = __v.__begin_ - (__end_ - __begin_);
+  std::__uninitialized_allocator_relocate(
+      __alloc(), std::__to_address(__begin_), std::__to_address(__end_), std::__to_address(__new_begin));
+  __v.__begin_ = __new_begin;
+  __end_       = __begin_; // All the objects have been destroyed by relocating them.
   std::swap(this->__begin_, __v.__begin_);
   std::swap(this->__end_, __v.__end_);
   std::swap(this->__end_cap(), __v.__end_cap());
@@ -997,22 +1001,35 @@ vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, a
   __annotate_new(size());
 }
 
+// __swap_out_circular_buffer relocates the objects in [__begin_, __p) into the front of __v, the objects in
+// [__p, __end_) into the back of __v and swaps the buffers of *this and __v. It is assumed that __v provides space for
+// exactly (__p - __begin_) objects in the front and space for at least (__end_ - __p) objects in the back. This
+// function has a strong exception guarantee if __begin_ == __p || __end_ == __p.
 template <class _Tp, class _Allocator>
 _LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
 vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p) {
   __annotate_delete();
-  pointer __r    = __v.__begin_;
-  using _RevIter = std::reverse_iterator<pointer>;
-  __v.__begin_   = std::__uninitialized_allocator_move_if_noexcept(
-                     __alloc(), _RevIter(__p), _RevIter(__begin_), _RevIter(__v.__begin_))
-                     .base();
-  __v.__end_ = std::__uninitialized_allocator_move_if_noexcept(__alloc(), __p, __end_, __v.__end_);
+  pointer __ret = __v.__begin_;
+
+  // Relocate [__p, __end_) first to avoid having a hole in [__begin_, __end_)
+  // in case something in [__begin_, __p) throws.
+  std::__uninitialized_allocator_relocate(
+      __alloc(), std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.__end_));
+  __v.__end_ += (__end_ - __p);
+  __end_           = __p; // The objects in [__p, __end_) have been destroyed by relocating them.
+  auto __new_begin = __v.__begin_ - (__p - __begin_);
+
+  std::__uninitialized_allocator_relocate(
+      __alloc(), std::__to_address(__begin_), std::__to_address(__p), std::__to_address(__new_begin));
+  __v.__begin_ = __new_begin;
+  __end_       = __begin_; // All the objects have been destroyed by relocating them.
+
   std::swap(this->__begin_, __v.__begin_);
   std::swap(this->__end_, __v.__end_);
   std::swap(this->__end_cap(), __v.__end_cap());
   __v.__first_ = __v.__begin_;
   __annotate_new(size());
-  return __r;
+  return __ret;
 }
 
 template <class _Tp, class _Allocator>



More information about the libcxx-commits mailing list