[cfe-dev] clang 3.5 loop optimizer seems to jump in unintentional for simple loops
Eric Christopher
echristo at gmail.com
Tue Jul 22 11:12:40 PDT 2014
You'll want to carry on this discussion on llvmdev at cs.uiuc.edu rather
than cfe-dev. cfe-dev is largely concerned with the front end, llvmdev
is the back end and optimizers.
Thanks!
-eric
On Mon, Jul 21, 2014 at 10:04 PM, Dennis Luehring <dl.soluz at gmx.net> wrote:
> clang 3.5 loop optimizer seems to jump in unintentional for simple loops
>
> the very simple example
>
> ----
> const int SIZE = 3;
>
> int the_func(int* p_array)
> {
> int dummy = 0;
> #if defined(ITER)
> for(int* p = &p_array[0]; p < &p_array[SIZE]; ++p) dummy += *p;
> #else
> for(int i = 0; i < SIZE; ++i) dummy += p_array[i];
> #endif
> return dummy;
> }
>
> int main(int argc, char** argv)
> {
> int* array = new int[SIZE];
> for(int i = 0; i < SIZE; ++i){ array[i] = *argv[i]; }
> int dummy = the_func(array);
> delete[] array;
> return dummy;
> }
> ----
>
> compiled with gcc 4.9.1 and clang 3.5
>
> clag -DITER -O2
> clag -DITER -O3
>
> gives:
>
> the_func:
> leaq 12(%rdi), %rcx
> leaq 4(%rdi), %rax
> cmpq %rax, %rcx
> cmovaq %rcx, %rax
> movq %rdi, %rsi
> notq %rsi
> addq %rax, %rsi
> shrq $2, %rsi
> incq %rsi
> xorl %edx, %edx
> movabsq $9223372036854775800, %rax # imm = 0x7FFFFFFFFFFFFFF8
> andq %rsi, %rax
> pxor %xmm0, %xmm0
> je .LBB0_1
> # BB#2: # %vector.body.preheader
> leaq (%rdi,%rax,4), %r8
> addq $16, %rdi
> movq %rsi, %rdx
> andq $-8, %rdx
> pxor %xmm0, %xmm0
> pxor %xmm1, %xmm1
> .align 16, 0x90
> .LBB0_3: # %vector.body
> # =>This Inner Loop Header: Depth=1
> movdqa %xmm1, %xmm2
> movdqa %xmm0, %xmm3
> movdqu -16(%rdi), %xmm0
> movdqu (%rdi), %xmm1
> paddd %xmm3, %xmm0
> paddd %xmm2, %xmm1
> addq $32, %rdi
> addq $-8, %rdx
> jne .LBB0_3
> # BB#4:
> movq %r8, %rdi
> movq %rax, %rdx
> jmp .LBB0_5
> .LBB0_1:
> pxor %xmm1, %xmm1
> .LBB0_5: # %middle.block
> paddd %xmm1, %xmm0
> movdqa %xmm0, %xmm1
> movhlps %xmm1, %xmm1 # xmm1 = xmm1[1,1]
> paddd %xmm0, %xmm1
> pshufd $1, %xmm1, %xmm0 # xmm0 = xmm1[1,0,0,0]
> paddd %xmm1, %xmm0
> movd %xmm0, %eax
> cmpq %rdx, %rsi
> je .LBB0_7
> .align 16, 0x90
> .LBB0_6: # %scalar.ph
> # =>This Inner Loop Header: Depth=1
> addl (%rdi), %eax
> addq $4, %rdi
> cmpq %rcx, %rdi
> jb .LBB0_6
> .LBB0_7: # %._crit_edge
> retq
>
> isn't that a little bit too long?
>
> other examples:
>
> clang -O2
> clang -O3
> gcc -O3
> gcc -DITER -O3
>
> gives:
>
> the_func:
> movl 4(%rdi), %eax
> addl (%rdi), %eax
> addl 8(%rdi), %eax
> ret(q)
>
> looks good
>
> gcc -DITER -O2
>
> gives:
>
> the_func:
> leaq 12(%rdi), %rdx
> xorl %eax, %eax
> .L2:
> addl (%rdi), %eax
> addq $4, %rdi
> cmpq %rdx, %rdi
> jne .L2
> rep ret
>
> looks good
>
> _______________________________________________
> cfe-dev mailing list
> cfe-dev at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev
More information about the cfe-dev
mailing list