EricWF wrote:

After implementing a version myself, I think there's a bunch of unneeded 
complexity in this type.
I compared the two implementations on a personal compiler-explorer which is set 
up to compare libc++ patches (I'm working to provide to other developers).

The use of `__libcpp_allocate` and `__libcpp_deallocate` allow the compiler to 
optimize away the allocations in mine, but not in this implementation. The 
result is the difference between each test optimizing to basically zero code vs 
a bunch of code.

This is the codegen for one of the `call` tests

```asm
test(): # @test()
  push rax
  lea rax, [rip + std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<void (*)() noexcept>]
  cmp rax, 7
  jbe .LBB0_8
  lea rax, [rip + std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>]
  cmp rax, 7
  jbe .LBB0_8
  mov byte ptr [rip + called], 0
  mov edi, 40
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>]
  cmp rcx, 7
  jbe .LBB0_8
  mov byte ptr [rip + called], 1
  mov esi, 40
  mov edx, 1
  mov rdi, rax
  call operator delete[](void*, unsigned long, std::align_val_t)@PLT
  mov byte ptr [rip + called], 0
  mov edi, 1
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<NonTrivial>]
  cmp rcx, 7
  jbe .LBB0_8
  mov byte ptr [rip + called], 1
  mov esi, 1
  mov edx, 1
  mov rdi, rax
  call operator delete[](void*, unsigned long, std::align_val_t)@PLT
  lea rax, [rip + std::__2::move_only_function<void (S) noexcept 
&&>::__trivial_vtable_<void (S::*)() noexcept &&>]
  cmp rax, 7
  jbe .LBB0_11
  lea rax, [rip + std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<CallTypeCheckerNoexcept>]
  cmp rax, 7
  jbe .LBB0_8
  pop rax
  ret
.LBB0_8:
  ud2
.LBB0_11:
  lea rdi, [rip + .L.str.2]
  lea rsi, [rip + .L.str.1]
  lea rcx, [rip + .L__PRETTY_FUNCTION__.test()]
  mov edx, 56
  call __assert_fail@PLT
  mov rdi, rax
  call __clang_call_terminate
  mov rdi, rax
  call __clang_call_terminate
test_return(): # @test_return()
  push rax
  lea rax, [rip + std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<int (*)(int) noexcept>]
  cmp rax, 7
  jbe .LBB1_9
  lea rax, [rip + std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>]
  cmp rax, 7
  jbe .LBB1_9
  mov byte ptr [rip + called], 0
  mov edi, 40
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>]
  cmp rcx, 7
  jbe .LBB1_9
  cmp byte ptr [rip + called], 1
  je .LBB1_11
  mov esi, 40
  mov edx, 1
  mov rdi, rax
  call operator delete[](void*, unsigned long, std::align_val_t)@PLT
  mov byte ptr [rip + called], 0
  mov edi, 1
  mov esi, 1
  call operator new[](unsigned long, std::align_val_t)@PLT
  lea rcx, [rip + std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<NonTrivial>]
  cmp rcx, 7
  jbe .LBB1_9
  cmp byte ptr [rip + called], 1
  je .LBB1_13
  mov esi, 1
  mov edx, 1
  mov rdi, rax
  pop rax
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
.LBB1_9:
  ud2
.LBB1_11:
  lea rdi, [rip + .L.str.6]
  lea rsi, [rip + .L.str.1]
  lea rcx, [rip + .L__PRETTY_FUNCTION__.test_return()]
  mov edx, 89
  call __assert_fail@PLT
.LBB1_13:
  lea rdi, [rip + .L.str.6]
  lea rsi, [rip + .L.str.1]
  lea rcx, [rip + .L__PRETTY_FUNCTION__.test_return()]
  mov edx, 95
  call __assert_fail@PLT
  mov rdi, rax
  call __clang_call_terminate
  mov rdi, rax
  call __clang_call_terminate
main: # @main
  push rax
  call test_return()
  call test()
  xor eax, eax
  pop rcx
  ret
__clang_call_terminate: # @__clang_call_terminate
  push rax
  call __cxa_begin_catch@PLT
  call std::terminate()@PLT
std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() 
noexcept>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void 
(*)() noexcept>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  jmp qword ptr [rdi] # TAILCALL
std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov byte ptr [rip + called], 1
  ret
std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov byte ptr [rip + called], 1
  ret
std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 40
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov byte ptr [rip + called], 1
  ret
std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 1
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void 
(S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, 
S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S): # 
@std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void 
(S::*)() noexcept &&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, 
S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S)
  push rax
  mov rax, rdi
  lea rdi, [rsp + 7]
  add rdi, qword ptr [rax + 8]
  call qword ptr [rax]
  pop rax
  ret
std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rax, qword ptr [rdi]
  mov dword ptr [rax], 2
  ret
std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int 
(*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, 
int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # 
@std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int 
(*)(int) noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, 
int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov rax, rdi
  mov edi, esi
  jmp qword ptr [rax] # TAILCALL
std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # 
@std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov eax, esi
  ret
std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # 
@std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov eax, esi
  ret
std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 40
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int): # 
@std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  mov eax, esi
  ret
std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&): # 
@std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  mov rdi, qword ptr [rdi]
  mov esi, 1
  mov edx, 1
  jmp operator delete[](void*, unsigned long, std::align_val_t)@PLT # TAILCALL
called:
  .byte 0 # 0x0

.L.str.1:
  .asciz 
"/tmp/compiler-explorer-compiler202457-3044864-1gsqf1r.1a4w/example.cpp"

.L__PRETTY_FUNCTION__.test():
  .asciz "void test()"

.L.str.2:
  .asciz "f"

.L__PRETTY_FUNCTION__.test_return():
  .asciz "void test_return()"

.L.str.6:
  .asciz "!called"

std::__2::move_only_function<void () noexcept &&>::__trivial_vtable_<void (*)() 
noexcept>:
  .quad std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<void (*)() 
noexcept>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>:
  .quad std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>:
  .quad std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  .quad std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<NonTrivial>:
  .quad std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)
  .quad std::__2::move_only_function<void () noexcept 
&&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<void (S) noexcept &&>::__trivial_vtable_<void 
(S::*)() noexcept &&>:
  .quad std::__2::move_only_function<void (S) noexcept 
&&>::__trivial_vtable_<void (S::*)() noexcept 
&&>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, 
S)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, S)

std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<CallTypeCheckerNoexcept>:
  .quad std::__2::move_only_function<void () noexcept 
&&>::__trivial_vtable_<CallTypeCheckerNoexcept>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<int (int) noexcept &&>::__trivial_vtable_<int 
(*)(int) noexcept>:
  .quad std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<int (*)(int) 
noexcept>::{lambda(std::__2::__small_buffer<24ul, 8ul>&, 
int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)

std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>:
  .quad std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructible>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)

std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>:
  .quad std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  .quad std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<TriviallyDestructibleTooLarge>::{lambda(std::__2::__small_buffer<24ul,
 8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<NonTrivial>:
  .quad std::__2::move_only_function<int (int) noexcept 
&&>::__trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&, int)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&, int)
  .quad std::__2::move_only_function<int (int) noexcept 
&&>::__non_trivial_vtable_<NonTrivial>::{lambda(std::__2::__small_buffer<24ul, 
8ul>&)#1}::__invoke(std::__2::__small_buffer<24ul, 8ul>&)

DW.ref.__gxx_personality_v0:
  .quad __gxx_personality_v0

  ```

vs 

```c++
test(): # @test()
  mov byte ptr [rip + called], 1
  ret
test_return(): # @test_return()
  mov byte ptr [rip + called], 0
  ret
main: # @main
  mov byte ptr [rip + called], 1
  xor eax, eax
  ret
called:
  .byte 0 # 0x0
```

I also think the `__pointer_int_pair` idiom is unneeded, and should be removed. 
It causes the optimizer to lose track of the `__call_` function pointer, which 
prevents the ellision of the call. That's a big issue, and one that I think 
should block.

Further, the use of `__small_buffer` prevents the ellision of the allocations, 
further hurting the optimizer.
I think this too should be simplified before proceeding. You can replace it 
with 
```c++ 
union { 
   void* __ptr_; 
   std::byte __buff_[sizeof(void*) * 3];
 }
```

I'm very happy to proceed using your mechanism for stamping out the 
specializations using repeated includes, In fact I prefer it over my 
overcomplicated metaprogramming. 

however I think there are a lot of simplifications that can be taken from my 
implementation that make things more readable and performant.

To Summarize, the requested changes are:

* remove `__pointer_int_pair`. 
* remove `__small_buffer`. 
* Simplify the vtable. There's no  need for inheritance. Use `__delete_ == 
nullptr` to signal trivially destructible types.


https://github.com/llvm/llvm-project/pull/94670
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to