[libcxx-commits] [libcxx] [libc++] Mark __{emplace, push}_back_slow_path as noinline (PR #94379)
Louis Dionne via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Oct 23 09:37:51 PDT 2024
ldionne wrote:
We just discussed this issue with @philnik777 and we went through several attempted solutions. First, we wanted to make the code look like what follows, which is simplest from the readability point of view. By using `[[unlikely]]` on the "reallocate" branch, we wanted to tell the optimizer that it should keep the `reserve()` call outlined.
Unfortunately, this rewrite doesn't work because it doesn't support `v.emplace_back(v.front())`. To support that, we need to allocate a new buffer, emplace the element into it, and finally move the existing elements. There's just no way to reorganize things to simplify it. It's sad, cause it would have been a nice simplification.
```diff
diff --git a/libcxx/include/vector b/libcxx/include/vector
index bfbf1ea1cfc9..8d4e32866675 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -656,6 +656,7 @@ public:
return this->__begin_ == this->__end_;
}
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT;
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __reserve_for_real(size_type __n);
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void reserve(size_type __n);
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void shrink_to_fit() _NOEXCEPT;
@@ -901,9 +902,6 @@ private:
template <class _Up>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI inline pointer __push_back_slow_path(_Up&& __x);
- template <class... _Args>
- _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI inline pointer __emplace_back_slow_path(_Args&&... __args);
-
// The following functions are no-ops outside of AddressSanitizer mode.
// We call annotations for every allocator, unless explicitly disabled.
//
@@ -1460,14 +1458,21 @@ vector<_Tp, _Allocator>::at(size_type __n) const {
return this->__begin_[__n];
}
+// This one assumes that we don't have enough capacity
+// It allows callers to inline the capacity check
+template <class _Tp, class _Allocator>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::__reserve_for_real(size_type __n) {
+ if (__n > max_size())
+ this->__throw_length_error();
+ allocator_type& __a = this->__alloc();
+ __split_buffer<value_type, allocator_type&> __v(__n, size(), __a);
+ __swap_out_circular_buffer(__v);
+}
+
template <class _Tp, class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 void vector<_Tp, _Allocator>::reserve(size_type __n) {
if (__n > capacity()) {
- if (__n > max_size())
- this->__throw_length_error();
- allocator_type& __a = this->__alloc();
- __split_buffer<value_type, allocator_type&> __v(__n, size(), __a);
- __swap_out_circular_buffer(__v);
+ __reserve_for_real(__n);
}
}
@@ -1529,19 +1534,6 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline _LIBCPP_HIDE_FROM_ABI void vector<_Tp, _All
this->__end_ = __end;
}
-template <class _Tp, class _Allocator>
-template <class... _Args>
-_LIBCPP_CONSTEXPR_SINCE_CXX20 typename vector<_Tp, _Allocator>::pointer
-vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) {
- allocator_type& __a = this->__alloc();
- __split_buffer<value_type, allocator_type&> __v(__recommend(size() + 1), size(), __a);
- // __v.emplace_back(std::forward<_Args>(__args)...);
- __alloc_traits::construct(__a, std::__to_address(__v.__end_), std::forward<_Args>(__args)...);
- __v.__end_++;
- __swap_out_circular_buffer(__v);
- return this->__end_;
-}
-
template <class _Tp, class _Allocator>
template <class... _Args>
_LIBCPP_CONSTEXPR_SINCE_CXX20 inline
@@ -1551,16 +1543,14 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline
void
#endif
vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) {
- pointer __end = this->__end_;
- if (__end < this->__end_cap()) {
- __construct_one_at_end(std::forward<_Args>(__args)...);
- ++__end;
- } else {
- __end = __emplace_back_slow_path(std::forward<_Args>(__args)...);
- }
- this->__end_ = __end;
+ if (size() == capacity()) [[__unlikely__]] {
+ __reserve_for_real(__recommend(size() + 1));
+ }
+
+ // Now we're in the fast path, and we can assume that we have enough capacity.
+ __construct_one_at_end(std::forward<_Args>(__args)...);
#if _LIBCPP_STD_VER >= 17
- return *(__end - 1);
+ return *(this->__end_ - 1);
#endif
}
```
The other thing we investigated was to rewrite the code like this:
```diff
diff --git a/libcxx/include/vector b/libcxx/include/vector
index bfbf1ea1cfc9..3b62dc029b76 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -1542,6 +1542,32 @@ vector<_Tp, _Allocator>::__emplace_back_slow_path(_Args&&... __args) {
return this->__end_;
}
+// This function is basically doing the following code:
+// if (cond) [[likely]]
+// __if();
+// else
+// __else();
+//
+// However, it does so without preventing the optimizer from inlining
+// the `else` branch in the case where the condition is known at compile-time.
+// This should really be fixed in the optimizer instead, see <LLVM BUG>.
+// Once that bug is fixed, there is no reason to keep this utility around.
+template <class _If, class _Else>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
+void __if_likely_else(bool __cond, _If __if, _Else __else) {
+ if (__builtin_constant_p(__cond)) {
+ if (__cond)
+ __if();
+ else
+ __else();
+ } else {
+ if (__cond) [[__likely__]]
+ __if();
+ else
+ __else();
+ }
+}
+
template <class _Tp, class _Allocator>
template <class... _Args>
_LIBCPP_CONSTEXPR_SINCE_CXX20 inline
@@ -1552,12 +1578,17 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 inline
#endif
vector<_Tp, _Allocator>::emplace_back(_Args&&... __args) {
pointer __end = this->__end_;
- if (__end < this->__end_cap()) {
- __construct_one_at_end(std::forward<_Args>(__args)...);
- ++__end;
- } else {
- __end = __emplace_back_slow_path(std::forward<_Args>(__args)...);
- }
+
+ bool __has_capacity = __end < this->__end_cap();
+ std::__if_likely_else(__has_capacity,
+ [&] {
+ __construct_one_at_end(std::forward<_Args>(__args)...);
+ ++__end;
+ },
+ [&] {
+ __end = __emplace_back_slow_path(std::forward<_Args>(__args)...);
+ });
+
this->__end_ = __end;
#if _LIBCPP_STD_VER >= 17
return *(__end - 1);
```
Basically, this applies `[[likely]]` on the no-reallocation branch, which makes the compiler outline the slow path like we want. However, we want the compiler to still constant-fold the capacity check when it can so as to completely fold the slow path when the vector shape is known. To make this happen, we need to work around what we view as an LLVM bug by dropping `[[unlikely]]`. We think this specific issue can be addressed, and once addressed, the weird `__if_likely_else` helper can go away.
https://github.com/llvm/llvm-project/pull/94379
More information about the libcxx-commits
mailing list