[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Jun 7 05:43:02 PDT 2024


EricWF wrote:

After implementing a version myself, I think there's a bunch of unneeded complexity in this type.
I compared the two implementations on a personal compiler-explorer which is set up to compare libc++ patches (I'm working to provide to other developers).

The use of `__libcpp_allocate` and `__libcpp_deallocate` allow the compiler to optimize away the allocations in mine, but not in this implementation. The result is the difference between each test optimizing to basically zero code vs a bunch of code.

This is the codegen for one of the `call` tests

```asm
test(): # @test()
  push rax
  lea rax, [rip + std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>]
  cmp rax, 7
  jbe .LBB0_8
  lea rax, [rip + std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>]
  cmp rax, 7
  jbe .LBB0_8
  mov byte ptr [rip + called], 0
  mov edi, 40
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>]
  cmp rcx, 7
  jbe .LBB0_8
  mov byte ptr [rip + called], 1
  mov esi, 40
  mov edx, 1
  mov rdi, rax
  call operator delete[](void*, unsigned long, std::align_val_t)@PLT
  mov byte ptr [rip + called], 0
  mov edi, 1
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>]
  cmp rcx, 7
  jbe .LBB0_8
  mov byte ptr [rip + called], 1
  mov esi, 1
  mov edx, 1
  mov rdi, rax
  call operator delete[](void*, unsigned long, std::align_val_t)@PLT
  lea rax, [rip + std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>]
  cmp rax, 7
  jbe .LBB0_11
  lea rax, [rip + std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>]
  cmp rax, 7
  jbe .LBB0_8
  pop rax
  ret
.LBB0_8:
  ud2
.LBB0_11:
  lea rdi, [rip + .L.str.2]
  lea rsi, [rip + .L.str.1]
  lea rcx, [rip + .L__PRETTY_FUNCTION__.test()]
  mov edx, 56
  call __assert_fail at PLT
  mov rdi, rax
  call __clang_call_terminate
  mov rdi, rax
  call __clang_call_terminate
test_return(): # @test_return()
  push rax
  lea rax, [rip + std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>]
  cmp rax, 7
  jbe .LBB1_9
  lea rax, [rip + std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>]
  cmp rax, 7
  jbe .LBB1_9
  mov byte ptr [rip + called], 0
  mov edi, 40
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>]
  cmp rcx, 7
  jbe .LBB1_9
  cmp byte ptr [rip + called], 1
  je .LBB1_11
  mov esi, 40
  mov edx, 1
  mov rdi, rax
  call operator delete[](void*, unsigned long, std::align_val_t)@PLT
  mov byte ptr [rip + called], 0
  mov edi, 1
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>]
  cmp rcx, 7
  jbe .LBB1_9
  cmp byte ptr [rip + called], 1
  je .LBB1_13
  mov esi, 1
  mov edx, 1
  mov rdi, rax
  pop rax
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
.LBB1_9:
  ud2
.LBB1_11:
  lea rdi, [rip + .L.str.6]
  lea rsi, [rip + .L.str.1]
  lea rcx, [rip + .L__PRETTY_FUNCTION__.test_return()]
  mov edx, 89
  call __assert_fail at PLT
.LBB1_13:
  lea rdi, [rip + .L.str.6]
  lea rsi, [rip + .L.str.1]
  lea rcx, [rip + .L__PRETTY_FUNCTION__.test_return()]
  mov edx, 95
  call __assert_fail at PLT
  mov rdi, rax
  call __clang_call_terminate
  mov rdi, rax
  call __clang_call_terminate
main: # @main
  push rax
  call test_return()
  call test()
  xor eax, eax
  pop rcx
  ret
__clang_call_terminate: # @__clang_call_terminate
  push rax
  call __cxa_begin_catch at PLT
  call std::terminate()@PLT
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  jmp qword ptr [rdi] # TAILCALL
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov byte ptr [rip + called], 1
  ret
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov byte ptr [rip + called], 1
  ret
std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 40
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov byte ptr [rip + called], 1
  ret
std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 1
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S): # @std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S)
  push rax
  mov rax, rdi
  lea rdi, [rsp + 7]
  add rdi, qword ptr [rax + 8]
  call qword ptr [rax]
  pop rax
  ret
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rax, qword ptr [rdi]
  mov dword ptr [rax], 2
  ret
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov rax, rdi
  mov edi, esi
  jmp qword ptr [rax] # TAILCALL
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov eax, esi
  ret
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov eax, esi
  ret
std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 40
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # @std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov eax, esi
  ret
std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # @std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 1
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
called:
  .byte 0 # 0x0

.L.str.1:
  .asciz "/tmp/compiler-explorer-compiler202457-3044864-1gsqf1r.1a4w/example.cpp"

.L__PRETTY_FUNCTION__.test():
  .asciz "void test()"

.L.str.2:
  .asciz "f"

.L__PRETTY_FUNCTION__.test_return():
  .asciz "void test_return()"

.L.str.6:
  .asciz "!called"

std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>:
  .quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>:
  .quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>:
  .quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  .quad std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>:
  .quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  .quad std::__2::move_only_function<void () noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>:
  .quad std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void (S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S)

std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>:
  .quad std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>:
  .quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int (*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)

std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>:
  .quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)

std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>:
  .quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  .quad std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>:
  .quad std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  .quad std::__2::move_only_function<int (int) noexcept &&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

DW.ref.__gxx_personality_v0:
  .quad __gxx_personality_v0

  ```

vs 

```c++
test(): # @test()
  mov byte ptr [rip + called], 1
  ret
test_return(): # @test_return()
  mov byte ptr [rip + called], 0
  ret
main: # @main
  mov byte ptr [rip + called], 1
  xor eax, eax
  ret
called:
  .byte 0 # 0x0
```

I also think the `__pointer_int_pair` idiom is unneeded, and should be removed. It causes the optimizer to lose track of the `__call_` function pointer, which prevents the ellision of the call. That's a big issue, and one that I think should block.

Further, the use of `__small_buffer` prevents the ellision of the allocations, further hurting the optimizer.
I think this too should be simplified before proceeding. You can replace it with 
```c++ 
union { 
   void* __ptr_; 
   std::byte __buff_[sizeof(void*) * 3];
 }
```

I'm very happy to proceed using your mechanism for stamping out the specializations using repeated includes, In fact I prefer it over my overcomplicated metaprogramming. 

however I think there are a lot of simplifications that can be taken from my implementation that make things more readable and performant.

To Summarize, the requested changes are:

* remove `__pointer_int_pair`. 
* remove `__small_buffer`. 
* Simplify the vtable. There's no  need for inheritance. Use `__delete_ == nullptr` to signal trivially destructible types.


https://github.com/llvm/llvm-project/pull/94670


More information about the llvm-branch-commits mailing list