[cfe-dev] missing optimization opportunity for const std::vector compared to std::array
Dennis Luehring
dl.soluz at gmx.net
Tue Jun 3 21:23:54 PDT 2014
Am 04.06.2014 00:16, schrieb Richard Smith:
>> >The standard (as of only a few months ago) allows operator new/operator
>> >delete calls such as the above to be removed, but*only* if they came from
>> >new-expressions and delete-expressions. Under the covers, std::vector uses
>> >direct calls to ::operator new and ::operator delete, so these provisions
>> >do not apply.
>> >
>> >So... we should add some way for std::allocator to say
>> >"allocate/deallocate memory like a new-expression", and use it in libc++. I
>> >think we should add
>> >
>> > void *__builtin_operator_new(size_t)
>> > void __builtin_operator_delete(void*)
>> >
>> >for this.
>> >
> Clang now supports these, and I have a patch out for review to make libc++
> use them. With the patch applied, clang+libc++ optimizes your code down to
> just 'return 160' for both the std::array and std::vector cases.
>
unbelievably fast integration :) - thanks alot for taking the time
how well does your patch play with
gcc.http://gcc.godbolt.org, clang version 3.4.1 -O3 -std=c++11
#include <string>
int main()
{
return std::string("hello").size();
}
results in:
main: # @main
pushq %rbx
subq $32, %rsp
leaq 16(%rsp), %rdi
leaq 8(%rsp), %rdx
movl $.L.str, %esi
callq std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::basic_string(char const*, std::allocator<char>
const&)
movq 16(%rsp), %rax
leaq -24(%rax), %rdi
movl std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_Rep::_S_empty_rep_storage, %ecx
cmpq %rcx, %rdi
movl -24(%rax), %ebx
jne .LBB0_1
.LBB0_6: # %_ZNSsD1Ev.exit
movl %ebx, %eax
addq $32, %rsp
popq %rbx
ret
.LBB0_1:
addq $-8, %rax
movl $__pthread_key_create, %ecx
testq %rcx, %rcx
je .LBB0_3
movl $-1, %ecx
lock
xaddl %ecx, (%rax)
movl %ecx, 28(%rsp)
movl 28(%rsp), %ecx
jmp .LBB0_4
.LBB0_3:
movl (%rax), %ecx
leal -1(%rcx), %edx
movl %edx, (%rax)
.LBB0_4: #
%_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
testl %ecx, %ecx
jg .LBB0_6
leaq 24(%rsp), %rsi
callq std::basic_string<char, std::char_traits<char>,
std::allocator<char> >::_Rep::_M_destroy(std::allocator<char> const&)
jmp .LBB0_6
.L.str:
.asciz "hello"
and
#include <vector>
#include <numeric>
typedef std::vector<int> container_t;
int main()
{
const container_t a{1,2};
const container_t b{4,5};
const container_t ints
{
std::accumulate(a.begin(),a.end(),1),
std::accumulate(b.begin(),b.end(),2),
};
return std::accumulate(ints.begin(),ints.end(),100);
}
results in:
main: # @main
pushq %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
movl $8, %edi
callq operator new(unsigned long)
movq %rax, %r14
movabsq $8589934593, %rax # imm = 0x200000001
movq %rax, (%r14)
movl $8, %edi
callq operator new(unsigned long)
movq %rax, %rbx
movabsq $21474836484, %rax # imm = 0x500000004
movq %rax, (%rbx)
movl (%r14), %r15d
movl 4(%r14), %ebp
movl $8, %edi
callq operator new(unsigned long)
leal 1(%r15,%rbp), %ebp
testq %rax, %rax
movl %ebp, (%rax)
movl $11, 4(%rax)
je .LBB0_5
movq %rax, %rdi
callq operator delete(void*)
.LBB0_5: # %_ZNSt6vectorIiSaIiEED2Ev.exit25
testq %rbx, %rbx
je .LBB0_7
movq %rbx, %rdi
callq operator delete(void*)
.LBB0_7: # %_ZNSt6vectorIiSaIiEED2Ev.exit23
addl $111, %ebp
testq %r14, %r14
je .LBB0_9
movq %r14, %rdi
callq operator delete(void*)
.LBB0_9: # %_ZNSt6vectorIiSaIiEED2Ev.exit21
movl %ebp, %eax
addq $8, %rsp
popq %rbx
popq %r14
popq %r15
popq %rbp
ret
movq %rax, %rbp
movq %rbp, %rdi
callq _Unwind_Resume
movq %rax, %rbp
jmp .LBB0_14
movq %rax, %rbp
testq %rbx, %rbx
je .LBB0_14
movq %rbx, %rdi
callq operator delete(void*)
.LBB0_14: # %_ZNSt6vectorIiSaIiEED2Ev.exit15
testq %r14, %r14
je .LBB0_16
movq %r14, %rdi
callq operator delete(void*)
.LBB0_16: # %_ZNSt6vectorIiSaIiEED2Ev.exit
movq %rbp, %rdi
callq _Unwind_Resume
GCC_except_table0:
.byte 255 # @LPStart Encoding = omit
.byte 3 # @TType Encoding = udata4
.asciz "\266\200\200" # @TType base offset
.byte 3 # Call site Encoding = udata4
.byte 52 # Call site table length
.long .Lset0
.long .Lset1
.long .Lset2
.byte 0 # On action: cleanup
.long .Lset3
.long .Lset4
.long .Lset5
.byte 0 # On action: cleanup
.long .Lset6
.long .Lset7
.long .Lset8
.byte 0 # On action: cleanup
.long .Lset9
.long .Lset10
.long 0 # has no landing pad
.byte 0 # On action: cleanup
More information about the cfe-dev
mailing list