[libcxx-commits] [libcxx] [libc++] Optimize vector growing of trivially relocatable types (PR #76657)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Dec 31 05:28:14 PST 2023
https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/76657
```
--------------------------------------------------
Benchmark old new
--------------------------------------------------
bm_grow<int> 1354 ns 1301 ns
bm_grow<std::string> 5584 ns 3370 ns
bm_grow<std::unique_ptr<int>> 3506 ns 1994 ns
bm_grow<std::deque<int>> 27114 ns 27209 ns
```
>From 46b8f19719b6830d20866e85e61a339d8fcf1a44 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Sun, 31 Dec 2023 13:35:00 +0100
Subject: [PATCH] [libc++] Optimize vector growing of trivially relocatable
types
---
libcxx/benchmarks/vector_operations.bench.cpp | 16 ++++++++
libcxx/docs/ReleaseNotes/18.rst | 2 +
libcxx/include/CMakeLists.txt | 1 +
libcxx/include/__fwd/string.h | 6 +++
.../__memory/uninitialized_algorithms.h | 20 ++++++++++
libcxx/include/__memory/unique_ptr.h | 4 ++
.../__type_traits/is_trivially_relocatable.h | 32 +++++++++++++++
libcxx/include/vector | 39 +++++++++++++------
8 files changed, 109 insertions(+), 11 deletions(-)
create mode 100644 libcxx/include/__type_traits/is_trivially_relocatable.h
diff --git a/libcxx/benchmarks/vector_operations.bench.cpp b/libcxx/benchmarks/vector_operations.bench.cpp
index 38b14c56756fba..d7d8ac4358aef4 100644
--- a/libcxx/benchmarks/vector_operations.bench.cpp
+++ b/libcxx/benchmarks/vector_operations.bench.cpp
@@ -1,6 +1,7 @@
#include <cstdint>
#include <cstdlib>
#include <cstring>
+#include <deque>
#include <functional>
#include <vector>
@@ -41,4 +42,19 @@ BENCHMARK_CAPTURE(BM_ConstructFromRange, vector_string, std::vector<std::string>
BENCHMARK_CAPTURE(BM_Pushback, vector_int, std::vector<int>{})->Arg(TestNumInputs);
+template <class T>
+void bm_grow(benchmark::State& state) {
+ for (auto _ : state) {
+ std::vector<T> vec;
+ benchmark::DoNotOptimize(vec);
+ for (size_t i = 0; i != 2048; ++i)
+ vec.emplace_back();
+ benchmark::DoNotOptimize(vec);
+ }
+}
+BENCHMARK(bm_grow<int>);
+BENCHMARK(bm_grow<std::string>);
+BENCHMARK(bm_grow<std::unique_ptr<int>>);
+BENCHMARK(bm_grow<std::deque<int>>);
+
BENCHMARK_MAIN();
diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst
index fa60a581652d6a..412140bf2bdbce 100644
--- a/libcxx/docs/ReleaseNotes/18.rst
+++ b/libcxx/docs/ReleaseNotes/18.rst
@@ -69,6 +69,8 @@ Improvements and New Features
- ``std::for_each`` has been optimized for segmented iterators like ``std::deque::iterator`` in C++23 and
later, which can lead up to 40x performance improvements.
+- The performance of growing ``std::vector`` has been improved for trivially relocatable types.
+
- The library now provides several hardening modes under which common cases of library undefined behavior will be turned
into a reliable program termination. The ``fast`` hardening mode enables a set of security-critical checks with
minimal runtime overhead; the ``extensive`` hardening mode additionally enables relatively cheap checks that catch
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 0fe3ab44d2466e..d0436aeda3d157 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -803,6 +803,7 @@ set(files
__type_traits/is_trivially_lexicographically_comparable.h
__type_traits/is_trivially_move_assignable.h
__type_traits/is_trivially_move_constructible.h
+ __type_traits/is_trivially_relocatable.h
__type_traits/is_unbounded_array.h
__type_traits/is_union.h
__type_traits/is_unsigned.h
diff --git a/libcxx/include/__fwd/string.h b/libcxx/include/__fwd/string.h
index 032132374de5ed..b811f4a5653306 100644
--- a/libcxx/include/__fwd/string.h
+++ b/libcxx/include/__fwd/string.h
@@ -12,6 +12,8 @@
#include <__availability>
#include <__config>
#include <__fwd/memory_resource.h>
+#include <__type_traits/is_fundamental.h>
+#include <__type_traits/is_trivially_relocatable.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -45,6 +47,10 @@ class _LIBCPP_TEMPLATE_VIS allocator;
template <class _CharT, class _Traits = char_traits<_CharT>, class _Allocator = allocator<_CharT> >
class _LIBCPP_TEMPLATE_VIS basic_string;
+// TODO: This could be extended to also allow custom allocators and possibly custom chars and char traits
+template <class _CharT>
+struct __libcpp_is_trivially_relocatable<basic_string<_CharT>> : is_fundamental<_CharT> {};
+
using string = basic_string<char>;
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h
index 2a4ecf655be287..3a8364de01617f 100644
--- a/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/libcxx/include/__memory/uninitialized_algorithms.h
@@ -29,6 +29,7 @@
#include <__type_traits/is_trivially_copy_constructible.h>
#include <__type_traits/is_trivially_move_assignable.h>
#include <__type_traits/is_trivially_move_constructible.h>
+#include <__type_traits/is_trivially_relocatable.h>
#include <__type_traits/is_unbounded_array.h>
#include <__type_traits/negation.h>
#include <__type_traits/remove_const.h>
@@ -646,6 +647,25 @@ __uninitialized_allocator_move_if_noexcept(_Alloc&, _Iter1 __first1, _Iter1 __la
}
#endif // _LIBCPP_COMPILER_GCC
+template <class _Alloc, class _Tp>
+struct __allocator_has_trivial_destroy : _Not<__has_destroy<_Alloc, _Tp*>> {};
+
+template <class _Tp, class _Up>
+struct __allocator_has_trivial_destroy<allocator<_Tp>, _Up> : true_type {};
+
+template <class _Alloc, class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void
+__uninitialized_allocator_relocate(_Alloc& __alloc, _Tp* __first, _Tp* __last, _Tp* __out) {
+ if (__libcpp_is_constant_evaluated() || !__libcpp_is_trivially_relocatable<_Tp>::value ||
+ !__allocator_has_trivial_move_construct<_Alloc, _Tp>::value ||
+ !__allocator_has_trivial_destroy<_Alloc, _Tp>::value) {
+ std::__uninitialized_allocator_move_if_noexcept(__alloc, __first, __last, __out);
+ std::__allocator_destroy(__alloc, __first, __last);
+ } else {
+ __builtin_memcpy(__out, __first, sizeof(_Tp) * (__last - __first));
+ }
+}
+
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___MEMORY_UNINITIALIZED_ALGORITHMS_H
diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h
index 7bf5e3c5e4e6b5..17f7adcdded29a 100644
--- a/libcxx/include/__memory/unique_ptr.h
+++ b/libcxx/include/__memory/unique_ptr.h
@@ -33,6 +33,7 @@
#include <__type_traits/is_reference.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_swappable.h>
+#include <__type_traits/is_trivially_relocatable.h>
#include <__type_traits/is_void.h>
#include <__type_traits/remove_extent.h>
#include <__type_traits/type_identity.h>
@@ -273,6 +274,9 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr {
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 void swap(unique_ptr& __u) _NOEXCEPT { __ptr_.swap(__u.__ptr_); }
};
+template <class _Tp>
+struct __libcpp_is_trivially_relocatable<unique_ptr<_Tp>> : true_type {};
+
template <class _Tp, class _Dp>
class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr<_Tp[], _Dp> {
public:
diff --git a/libcxx/include/__type_traits/is_trivially_relocatable.h b/libcxx/include/__type_traits/is_trivially_relocatable.h
new file mode 100644
index 00000000000000..4b95d972ebcf9d
--- /dev/null
+++ b/libcxx/include/__type_traits/is_trivially_relocatable.h
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_RELOCATABLE_H
+#define _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_RELOCATABLE_H
+
+#include <__config>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_trivial.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if __has_builtin(__is_trivially_relocatable)
+template <class _Tp>
+struct __libcpp_is_trivially_relocatable : integral_constant<bool, __is_trivially_relocatable(_Tp)> {};
+#else
+template <class _Tp>
+struct __libcpp_is_trivially_relocatable : is_trivial<_Tp> {};
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___TYPE_TRAITS_IS_TRIVIALLY_RELOCATABLE_H
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 0098273a195ff8..1e368fdcb8d3dd 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -982,14 +982,18 @@ template <ranges::input_range _Range,
vector(from_range_t, _Range&&, _Alloc = _Alloc()) -> vector<ranges::range_value_t<_Range>, _Alloc>;
#endif
+// __swap_out_circular_buffer relocates the objects in [__begin_, __end_) into the front of __v and swaps the buffers of
+// *this and __v. It is assumed that __v provides space for exactly (__end_ - __begin_) objects in the front. This
+// function has a strong exception guarantee.
template <class _Tp, class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 void
vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v) {
__annotate_delete();
- using _RevIter = std::reverse_iterator<pointer>;
- __v.__begin_ = std::__uninitialized_allocator_move_if_noexcept(
- __alloc(), _RevIter(__end_), _RevIter(__begin_), _RevIter(__v.__begin_))
- .base();
+ auto __new_begin = __v.__begin_ - (__end_ - __begin_);
+ std::__uninitialized_allocator_relocate(
+ __alloc(), std::__to_address(__begin_), std::__to_address(__end_), std::__to_address(__new_begin));
+ __v.__begin_ = __new_begin;
+ __end_ = __begin_; // All the objects have been destroyed by relocating them.
std::swap(this->__begin_, __v.__begin_);
std::swap(this->__end_, __v.__end_);
std::swap(this->__end_cap(), __v.__end_cap());
@@ -997,22 +1001,35 @@ vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, a
__annotate_new(size());
}
+// __swap_out_circular_buffer relocates the objects in [__begin_, __p) into the front of __v, the objects in
+// [__p, __end_) into the back of __v and swaps the buffers of *this and __v. It is assumed that __v provides space for
+// exactly (__p - __begin_) objects in the front and space for at least (__end_ - __p) objects in the back. This
+// function has a strong exception guarantee if __begin_ == __p || __end_ == __p.
template <class _Tp, class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
vector<_Tp, _Allocator>::__swap_out_circular_buffer(__split_buffer<value_type, allocator_type&>& __v, pointer __p) {
__annotate_delete();
- pointer __r = __v.__begin_;
- using _RevIter = std::reverse_iterator<pointer>;
- __v.__begin_ = std::__uninitialized_allocator_move_if_noexcept(
- __alloc(), _RevIter(__p), _RevIter(__begin_), _RevIter(__v.__begin_))
- .base();
- __v.__end_ = std::__uninitialized_allocator_move_if_noexcept(__alloc(), __p, __end_, __v.__end_);
+ pointer __ret = __v.__begin_;
+
+ // Relocate [__p, __end_) first to avoid having a hole in [__begin_, __end_)
+ // in case something in [__begin_, __p) throws.
+ std::__uninitialized_allocator_relocate(
+ __alloc(), std::__to_address(__p), std::__to_address(__end_), std::__to_address(__v.__end_));
+ __v.__end_ += (__end_ - __p);
+ __end_ = __p; // The objects in [__p, __end_) have been destroyed by relocating them.
+ auto __new_begin = __v.__begin_ - (__p - __begin_);
+
+ std::__uninitialized_allocator_relocate(
+ __alloc(), std::__to_address(__begin_), std::__to_address(__p), std::__to_address(__new_begin));
+ __v.__begin_ = __new_begin;
+ __end_ = __begin_; // All the objects have been destroyed by relocating them.
+
std::swap(this->__begin_, __v.__begin_);
std::swap(this->__end_, __v.__end_);
std::swap(this->__end_cap(), __v.__end_cap());
__v.__first_ = __v.__begin_;
__annotate_new(size());
- return __r;
+ return __ret;
}
template <class _Tp, class _Allocator>
More information about the libcxx-commits
mailing list