[cfe-dev] clang 3.5 loop optimizer seems to jump in unintentional for simple loops

Dennis Luehring dl.soluz at gmx.net
Mon Jul 21 22:04:16 PDT 2014


clang 3.5 loop optimizer seems to jump in unintentional for simple loops

the very simple example

----
const int SIZE = 3;

int the_func(int* p_array)
{
   int dummy = 0;
#if defined(ITER)
   for(int* p = &p_array[0]; p < &p_array[SIZE]; ++p) dummy += *p;
#else
   for(int i = 0; i < SIZE; ++i) dummy += p_array[i];
#endif
   return dummy;
}

int main(int argc, char** argv)
{
   int* array = new int[SIZE];
   for(int i = 0; i < SIZE; ++i){ array[i] = *argv[i]; }
   int dummy = the_func(array);
   delete[] array;
   return dummy;
}
----

compiled with gcc 4.9.1 and clang 3.5

clag -DITER -O2
clag -DITER -O3

gives:

the_func:
     leaq    12(%rdi), %rcx
     leaq    4(%rdi), %rax
     cmpq    %rax, %rcx
     cmovaq    %rcx, %rax
     movq    %rdi, %rsi
     notq    %rsi
     addq    %rax, %rsi
     shrq    $2, %rsi
     incq    %rsi
     xorl    %edx, %edx
     movabsq    $9223372036854775800, %rax # imm = 0x7FFFFFFFFFFFFFF8
     andq    %rsi, %rax
     pxor    %xmm0, %xmm0
     je    .LBB0_1
# BB#2:                                 # %vector.body.preheader
     leaq    (%rdi,%rax,4), %r8
     addq    $16, %rdi
     movq    %rsi, %rdx
     andq    $-8, %rdx
     pxor    %xmm0, %xmm0
     pxor    %xmm1, %xmm1
     .align    16, 0x90
.LBB0_3:                                # %vector.body
                                         # =>This Inner Loop Header: Depth=1
     movdqa    %xmm1, %xmm2
     movdqa    %xmm0, %xmm3
     movdqu    -16(%rdi), %xmm0
     movdqu    (%rdi), %xmm1
     paddd    %xmm3, %xmm0
     paddd    %xmm2, %xmm1
     addq    $32, %rdi
     addq    $-8, %rdx
     jne    .LBB0_3
# BB#4:
     movq    %r8, %rdi
     movq    %rax, %rdx
     jmp    .LBB0_5
.LBB0_1:
     pxor    %xmm1, %xmm1
.LBB0_5:                                # %middle.block
     paddd    %xmm1, %xmm0
     movdqa    %xmm0, %xmm1
     movhlps    %xmm1, %xmm1            # xmm1 = xmm1[1,1]
     paddd    %xmm0, %xmm1
     pshufd    $1, %xmm1, %xmm0        # xmm0 = xmm1[1,0,0,0]
     paddd    %xmm1, %xmm0
     movd    %xmm0, %eax
     cmpq    %rdx, %rsi
     je    .LBB0_7
     .align    16, 0x90
.LBB0_6:                                # %scalar.ph
                                         # =>This Inner Loop Header: Depth=1
     addl    (%rdi), %eax
     addq    $4, %rdi
     cmpq    %rcx, %rdi
     jb    .LBB0_6
.LBB0_7:                                # %._crit_edge
     retq

isn't that a little bit too long?

other examples:

clang -O2
clang -O3
gcc -O3
gcc -DITER -O3

gives:

the_func:
     movl    4(%rdi), %eax
     addl    (%rdi), %eax
     addl    8(%rdi), %eax
     ret(q)

looks good

gcc -DITER -O2

gives:

the_func:
     leaq    12(%rdi), %rdx
     xorl    %eax, %eax
.L2:
     addl    (%rdi), %eax
     addq    $4, %rdi
     cmpq    %rdx, %rdi
     jne    .L2
     rep ret

looks good




More information about the cfe-dev mailing list