[LLVMdev] the clang 3.5 loop optimizer seems to jump in unintentional for simple loops

Tue Jul 22 21:00:44 PDT 2014

Hi Dennis,

Can you please file a bug for this at http://llvm.org/bugs/ -- we should not be vectorizing this loop of length 3.

 -Hal

----- Original Message -----
> From: "Dennis Luehring" <dl.soluz at gmx.net>
> To: llvmdev at cs.uiuc.edu
> Sent: Tuesday, July 22, 2014 10:51:54 PM
> Subject: [LLVMdev] the clang 3.5 loop optimizer seems to jump in unintentional for simple loops
> 
> the clang 3.5 loop optimizer seems to jump in unintentional for
> simple loops
> 
> the very simple example
> 
> ----
> const int SIZE = 3;
> 
> int the_func(int* p_array)
> {
>     int dummy = 0;
> #if defined(ITER)
>     for(int* p = &p_array[0]; p < &p_array[SIZE]; ++p) dummy += *p;
> #else
>     for(int i = 0; i < SIZE; ++i) dummy += p_array[i];
> #endif
>     return dummy;
> }
> 
> int main(int argc, char** argv)
> {
>     int* array = new int[SIZE];
>     for(int i = 0; i < SIZE; ++i){ array[i] = *argv[i]; }
>     int dummy = the_func(array);
>     delete[] array;
>     return dummy;
> }
> ----
> 
> compiled with gcc 4.9.1 and clang 3.5
> 
> with clang3.5 + #define ITER the_func contains masses of code
> the code in main is also sometimes different (not just inlined) to
> the_func
> 
> clang -DITER -O2
> clang -DITER -O3
> 
> gives:
> 
> the_func:
>       leaq    12(%rdi), %rcx
>       leaq    4(%rdi), %rax
>       cmpq    %rax, %rcx
>       cmovaq    %rcx, %rax
>       movq    %rdi, %rsi
>       notq    %rsi
>       addq    %rax, %rsi
>       shrq    $2, %rsi
>       incq    %rsi
>       xorl    %edx, %edx
>       movabsq    $9223372036854775800, %rax # imm =
>       0x7FFFFFFFFFFFFFF8
>       andq    %rsi, %rax
>       pxor    %xmm0, %xmm0
>       je    .LBB0_1
> # BB#2:                                 # %vector.body.preheader
>       leaq    (%rdi,%rax,4), %r8
>       addq    $16, %rdi
>       movq    %rsi, %rdx
>       andq    $-8, %rdx
>       pxor    %xmm0, %xmm0
>       pxor    %xmm1, %xmm1
>       .align    16, 0x90
> .LBB0_3:                                # %vector.body
>                                           # =>This Inner Loop Header:
>                                           Depth=1
>       movdqa    %xmm1, %xmm2
>       movdqa    %xmm0, %xmm3
>       movdqu    -16(%rdi), %xmm0
>       movdqu    (%rdi), %xmm1
>       paddd    %xmm3, %xmm0
>       paddd    %xmm2, %xmm1
>       addq    $32, %rdi
>       addq    $-8, %rdx
>       jne    .LBB0_3
> # BB#4:
>       movq    %r8, %rdi
>       movq    %rax, %rdx
>       jmp    .LBB0_5
> .LBB0_1:
>       pxor    %xmm1, %xmm1
> .LBB0_5:                                # %middle.block
>       paddd    %xmm1, %xmm0
>       movdqa    %xmm0, %xmm1
>       movhlps    %xmm1, %xmm1            # xmm1 = xmm1[1,1]
>       paddd    %xmm0, %xmm1
>       pshufd    $1, %xmm1, %xmm0        # xmm0 = xmm1[1,0,0,0]
>       paddd    %xmm1, %xmm0
>       movd    %xmm0, %eax
>       cmpq    %rdx, %rsi
>       je    .LBB0_7
>       .align    16, 0x90
> .LBB0_6:                                # %scalar.ph
>                                           # =>This Inner Loop Header:
>                                           Depth=1
>       addl    (%rdi), %eax
>       addq    $4, %rdi
>       cmpq    %rcx, %rdi
>       jb    .LBB0_6
> .LBB0_7:                                # %._crit_edge
>       retq
> 
> isn't that a little bit too long?
> 
> other better looking results:
> 
> clang -O2
> clang -O3
> gcc -O3
> gcc -DITER -O3
> 
> gives:
> 
> the_func:
>       movl    4(%rdi), %eax
>       addl    (%rdi), %eax
>       addl    8(%rdi), %eax
>       ret(q)
> 
> looks good
> 
> gcc -DITER -O2
> 
> gives:
> 
> the_func:
>       leaq    12(%rdi), %rdx
>       xorl    %eax, %eax
> .L2:
>       addl    (%rdi), %eax
>       addq    $4, %rdi
>       cmpq    %rdx, %rdi
>       jne    .L2
>       rep ret
> 
> looks good
> 
> 
> gcc4.9.1 seems to be more "stable" in its optimization for the_func
> and main
> 
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
> 

-- 
Hal Finkel
Assistant Computational Scientist
Leadership Computing Facility
Argonne National Laboratory