[cfe-dev] missing optimization opportunity for const std::vector compared to std::array

Dennis Luehring dl.soluz at gmx.net
Fri Sep 20 04:33:30 PDT 2013


i've written this small testprogram to test the gcc4.8.1 optimizer and 
found a optimization opportunity

for details see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58483

then i compared the gcc results to clang 3.3 and found that the 
optimization of llvm "seems" to be far aways from the gcc results in 
this case

--- test.cpp ---
#include <vector>
#include <numeric>
#include <array>

static int calc(const std::array<int,3> p_ints, const int& p_init)
//static int calc(const std::vector<int> p_ints, const int& p_init)
{
   return std::accumulate(p_ints.begin(), p_ints.end(), p_init);
}

int main()
{
   const int result = calc({10,20,30},100);
   return result;
}

gcc-optimizer-result using std::array

main:
     mov    eax, 160
     ret

gcc-optimizer result using std::vector

main:
     push    rbx
     mov    edi, 12
     call    operator new(unsigned long)
     mov    rdx, QWORD PTR ._81[rip]
     mov    rdi, rax
     mov    QWORD PTR [rax], rdx
     mov    eax, DWORD PTR ._81[rip+8]
     mov    rsi, rdx
     shr    rsi, 32
     lea    ebx, [rsi+100+rdx]
     add    ebx, eax
     test    rdi, rdi
     mov    DWORD PTR [rdi+8], eax
     je    .L2
     call    operator delete(void*)
.L2:
     mov    eax, ebx
     pop    rbx
     ret
._81:
     .long    10
     .long    20
     .long    30

the clang 3.3 results for -O3 -march=native -std=c++11

using std::array

main:                                   # @main
     movabsq    $85899345930, %rax      # imm = 0x140000000A
     movq    %rax, -16(%rsp)
     movl    $100, %esi
     movl    $30, -8(%rsp)
     xorl    %edx, %edx
     leaq    -16(%rsp), %rcx
     movb    $1, %al
     testb    %al, %al
     jne    .LBB0_1
     movd    %esi, %xmm1
     pxor    %xmm0, %xmm0
     xorl    %eax, %eax
.LBB0_3:                                # %vector.body.i.i
     movdqu    (%rsp,%rax,4), %xmm2
     paddd    %xmm2, %xmm0
     movdqu    -16(%rsp,%rax,4), %xmm2
     paddd    %xmm2, %xmm1
     addq    $8, %rax
     cmpq    %rax, %rdx
     jne    .LBB0_3
     jmp    .LBB0_4
.LBB0_1:
     pxor    %xmm0, %xmm0
     movd    %esi, %xmm1
.LBB0_4:                                # %middle.block.i.i
     movl    $3, %esi
     paddd    %xmm1, %xmm0
     movdqa    %xmm0, %xmm1
     movhlps    %xmm1, %xmm1            # xmm1 = xmm1[1,1]
     paddd    %xmm0, %xmm1
     phaddd    %xmm1, %xmm1
     movd    %xmm1, %eax
     cmpq    %rdx, %rsi
     je    .LBB0_7
     addq    $-12, %rcx
     leaq    -16(%rsp), %rdx
.LBB0_6:                                # %scalar.ph.i.i
     addl    12(%rcx), %eax
     addq    $4, %rcx
     cmpq    %rcx, %rdx
     jne    .LBB0_6
.LBB0_7:                                # %_ZL4calcSt5arrayIiLm3EERKi.exit
     ret

using std::vector

main:                                   # @main
     pushq    %rbx
     movl    $12, %edi
     callq    operator new(unsigned long)
     movabsq    $85899345930, %rcx      # imm = 0x140000000A
     movq    %rcx, (%rax)
     xorl    %ecx, %ecx
     movl    $3, %edx
     movl    $100, %esi
     movl    $30, 8(%rax)
     movb    $1, %bl
     movd    %esi, %xmm1
     pxor    %xmm0, %xmm0
     testb    %bl, %bl
     jne    .LBB0_3
     xorl    %esi, %esi
.LBB0_2:                                # %vector.body.i.i
     movdqu    16(%rax,%rsi,4), %xmm2
     paddd    %xmm2, %xmm0
     movdqu    (%rax,%rsi,4), %xmm2
     paddd    %xmm2, %xmm1
     addq    $8, %rsi
     cmpq    %rsi, %rcx
     jne    .LBB0_2
.LBB0_3:                                # %middle.block.i.i
     paddd    %xmm1, %xmm0
     movdqa    %xmm0, %xmm1
     movhlps    %xmm1, %xmm1            # xmm1 = xmm1[1,1]
     paddd    %xmm0, %xmm1
     phaddd    %xmm1, %xmm1
     movd    %xmm1, %ebx
     cmpq    %rcx, %rdx
     je    .LBB0_6
     movq    %rax, %rcx
     addq    $-12, %rcx
.LBB0_5:                                # %scalar.ph.i.i
     addl    12(%rcx), %ebx
     addq    $4, %rcx
     cmpq    %rcx, %rax
     jne    .LBB0_5
.LBB0_6:                                # %_ZL4calcSt6vectorIiSaIiEERKi.exit
     testq    %rax, %rax
     je    .LBB0_8
     movq    %rax, %rdi
     callq    operator delete(void*)
.LBB0_8:                                # %_ZNSt6vectorIiSaIiEED1Ev.exit
     movl    %ebx, %eax
     popq    %rbx
     ret

is the llvm optimizer not able to optimize this better, is that a better 
result or do i something wrong here

thx







More information about the cfe-dev mailing list