[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 7 05:43:02 PDT 2024
EricWF wrote:
After implementing a version myself, I think there's a bunch of unneeded complexity in this type.
I compared the two implementations on a personal compiler-explorer which is set up to compare libc++ patches (I'm working to provide to other developers).
The use of `__libcpp_allocate` and `__libcpp_deallocate` allow the compiler to optimize away the allocations in mine, but not in this implementation. The result is the difference between each test optimizing to basically zero code vs a bunch of code.
This is the codegen for one of the `call` tests
```asm
test(): # @test()
push rax
lea rax, [rip + std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>]
cmp rax, 7
jbe .LBB0_8
lea rax, [rip + std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>]
cmp rax, 7
jbe .LBB0_8
mov byte ptr [rip + called], 0
mov edi, 40
mov esi, 1
call operator new[](unsigned long, std::align_val_t)@PLT
lea rcx, [rip + std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>]
cmp rcx, 7
jbe .LBB0_8
mov byte ptr [rip + called], 1
mov esi, 40
mov edx, 1
mov rdi, rax
call operator delete[](void*, unsigned long, std::align_val_t)@PLT
mov byte ptr [rip + called], 0
mov edi, 1
mov esi, 1
call operator new[](unsigned long, std::align_val_t)@PLT
lea rcx, [rip + std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>]
cmp rcx, 7
jbe .LBB0_8
mov byte ptr [rip + called], 1
mov esi, 1
mov edx, 1
mov rdi, rax
call operator delete[](void*, unsigned long, std::align_val_t)@PLT
lea rax, [rip + std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>]
cmp rax, 7
jbe .LBB0_11
lea rax, [rip + std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>]
cmp rax, 7
jbe .LBB0_8
pop rax
ret
.LBB0_8:
ud2
.LBB0_11:
lea rdi, [rip + .L.str.2]
lea rsi, [rip + .L.str.1]
lea rcx, [rip + .L__PRETTY_FUNCTION__.test()]
mov edx, 56
call __assert_fail at PLT
mov rdi, rax
call __clang_call_terminate
mov rdi, rax
call __clang_call_terminate
test_return(): # @test_return()
push rax
lea rax, [rip + std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>]
cmp rax, 7
jbe .LBB1_9
lea rax, [rip + std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>]
cmp rax, 7
jbe .LBB1_9
mov byte ptr [rip + called], 0
mov edi, 40
mov esi, 1
call operator new[](unsigned long, std::align_val_t)@PLT
lea rcx, [rip + std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>]
cmp rcx, 7
jbe .LBB1_9
cmp byte ptr [rip + called], 1
je .LBB1_11
mov esi, 40
mov edx, 1
mov rdi, rax
call operator delete[](void*, unsigned long, std::align_val_t)@PLT
mov byte ptr [rip + called], 0
mov edi, 1
mov esi, 1
call operator new[](unsigned long, std::align_val_t)@PLT
lea rcx, [rip + std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>]
cmp rcx, 7
jbe .LBB1_9
cmp byte ptr [rip + called], 1
je .LBB1_13
mov esi, 1
mov edx, 1
mov rdi, rax
pop rax
jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
.LBB1_9:
ud2
.LBB1_11:
lea rdi, [rip + .L.str.6]
lea rsi, [rip + .L.str.1]
lea rcx, [rip + .L__PRETTY_FUNCTION__.test_return()]
mov edx, 89
call __assert_fail at PLT
.LBB1_13:
lea rdi, [rip + .L.str.6]
lea rsi, [rip + .L.str.1]
lea rcx, [rip + .L__PRETTY_FUNCTION__.test_return()]
mov edx, 95
call __assert_fail at PLT
mov rdi, rax
call __clang_call_terminate
mov rdi, rax
call __clang_call_terminate
main: # @main
push rax
call test_return()
call test()
xor eax, eax
pop rcx
ret
__clang_call_terminate: # @__clang_call_terminate
push rax
call __cxa_begin_catch at PLT
call std::terminate()@PLT
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
jmp qword ptr [rdi] # TAILCALL
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov byte ptr [rip + called], 1
ret
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov byte ptr [rip + called], 1
ret
std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov rdi, qword ptr [rdi]
mov esi, 40
mov edx, 1
jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov byte ptr [rip + called], 1
ret
std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov rdi, qword ptr [rdi]
mov esi, 1
mov edx, 1
jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S): # @std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S)
push rax
mov rax, rdi
lea rdi, [rsp + 7]
add rdi, qword ptr [rax + 8]
call qword ptr [rax]
pop rax
ret
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov rax, qword ptr [rdi]
mov dword ptr [rax], 2
ret
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
mov rax, rdi
mov edi, esi
jmp qword ptr [rax] # TAILCALL
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
mov eax, esi
ret
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
mov eax, esi
ret
std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov rdi, qword ptr [rdi]
mov esi, 40
mov edx, 1
jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
mov eax, esi
ret
std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
mov rdi, qword ptr [rdi]
mov esi, 1
mov edx, 1
jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
called:
.byte 0 # 0x0
.L.str.1:
.asciz "/tmp/compiler-explorer-compiler202457-3044864-1gsqf1r.1a4w/example.cpp"
.L__PRETTY_FUNCTION__.test():
.asciz "void test()"
.L.str.2:
.asciz "f"
.L__PRETTY_FUNCTION__.test_return():
.asciz "void test_return()"
.L.str.6:
.asciz "!called"
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>:
.quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>:
.quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>:
.quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
.quad std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>:
.quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
.quad std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>:
.quad std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S)
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>:
.quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>:
.quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>:
.quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>:
.quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
.quad std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>:
.quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
.quad std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
DW.ref.__gxx_personality_v0:
.quad __gxx_personality_v0
```
vs
```c++
test(): # @test()
mov byte ptr [rip + called], 1
ret
test_return(): # @test_return()
mov byte ptr [rip + called], 0
ret
main: # @main
mov byte ptr [rip + called], 1
xor eax, eax
ret
called:
.byte 0 # 0x0
```
I also think the `__pointer_int_pair` idiom is unneeded, and should be removed. It causes the optimizer to lose track of the `__call_` function pointer, which prevents the ellision of the call. That's a big issue, and one that I think should block.
Further, the use of `__small_buffer` prevents the ellision of the allocations, further hurting the optimizer.
I think this too should be simplified before proceeding. You can replace it with
```c++
union {
void* __ptr_;
std::byte __buff_[sizeof(void*) * 3];
}
```
I'm very happy to proceed using your mechanism for stamping out the specializations using repeated includes, In fact I prefer it over my overcomplicated metaprogramming.
however I think there are a lot of simplifications that can be taken from my implementation that make things more readable and performant.
To Summarize, the requested changes are:
* remove `__pointer_int_pair`.
* remove `__small_buffer`.
* Simplify the vtable. There's no need for inheritance. Use `__delete_ == nullptr` to signal trivially destructible types.
https://github.com/llvm/llvm-project/pull/94670
More information about the llvm-branch-commits
mailing list