<html>

    <head>

      <base href="http://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - the clang 3.5 loop optimizer seems to jump in unintentional for simple loops"

   href="http://llvm.org/bugs/show_bug.cgi?id=20409">20409</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>the clang 3.5 loop optimizer seems to jump in unintentional for simple loops

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>new-bugs

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>new bugs

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>dl.soluz@gmx.net

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvmbugs@cs.uiuc.edu

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>----

const int SIZE = 3;

int the_func(int* p_array)

{

    int dummy = 0;

#if defined(ITER)

    for(int* p = &p_array[0]; p < &p_array[SIZE]; ++p) dummy += *p;

#else

    for(int i = 0; i < SIZE; ++i) dummy += p_array[i];

#endif

    return dummy;

}

int main(int argc, char** argv)

{

    int* array = new int[SIZE];

    for(int i = 0; i < SIZE; ++i){ array[i] = *argv[i]; }

    int dummy = the_func(array);

    delete[] array;

    return dummy;

}

----

compiled with gcc 4.9.1 and clang 3.5

with clang3.5 + #define ITER the_func contains masses of code

the code in main is also sometimes different to an inline the_func

(the loop vectorizer seems to be more active in the_func then main)

clang -DITER -O2

clang -DITER -O3

gives:

the_func:

      leaq    12(%rdi), %rcx

      leaq    4(%rdi), %rax

      cmpq    %rax, %rcx

      cmovaq    %rcx, %rax

      movq    %rdi, %rsi

      notq    %rsi

      addq    %rax, %rsi

      shrq    $2, %rsi

      incq    %rsi

      xorl    %edx, %edx

      movabsq    $9223372036854775800, %rax # imm = 0x7FFFFFFFFFFFFFF8

      andq    %rsi, %rax

      pxor    %xmm0, %xmm0

      je    .LBB0_1

# BB#2:                                 # %vector.body.preheader

      leaq    (%rdi,%rax,4), %r8

      addq    $16, %rdi

      movq    %rsi, %rdx

      andq    $-8, %rdx

      pxor    %xmm0, %xmm0

      pxor    %xmm1, %xmm1

      .align    16, 0x90

.LBB0_3:                                # %vector.body

                                          # =>This Inner Loop Header: Depth=1

      movdqa    %xmm1, %xmm2

      movdqa    %xmm0, %xmm3

      movdqu    -16(%rdi), %xmm0

      movdqu    (%rdi), %xmm1

      paddd    %xmm3, %xmm0

      paddd    %xmm2, %xmm1

      addq    $32, %rdi

      addq    $-8, %rdx

      jne    .LBB0_3

# BB#4:

      movq    %r8, %rdi

      movq    %rax, %rdx

      jmp    .LBB0_5

.LBB0_1:

      pxor    %xmm1, %xmm1

.LBB0_5:                                # %middle.block

      paddd    %xmm1, %xmm0

      movdqa    %xmm0, %xmm1

      movhlps    %xmm1, %xmm1            # xmm1 = xmm1[1,1]

      paddd    %xmm0, %xmm1

      pshufd    $1, %xmm1, %xmm0        # xmm0 = xmm1[1,0,0,0]

      paddd    %xmm1, %xmm0

      movd    %xmm0, %eax

      cmpq    %rdx, %rsi

      je    .LBB0_7

      .align    16, 0x90

.LBB0_6:                                # %scalar.ph

                                          # =>This Inner Loop Header: Depth=1

      addl    (%rdi), %eax

      addq    $4, %rdi

      cmpq    %rcx, %rdi

      jb    .LBB0_6

.LBB0_7:                                # %._crit_edge

      retq

isn't that a little bit too long?

other better looking results:

clang -O2

clang -O3

gcc -O3

gcc -DITER -O3

gives:

the_func:

      movl    4(%rdi), %eax

      addl    (%rdi), %eax

      addl    8(%rdi), %eax

      ret(q)

looks good

gcc -DITER -O2

gives:

the_func:

      leaq    12(%rdi), %rdx

      xorl    %eax, %eax

.L2:

      addl    (%rdi), %eax

      addq    $4, %rdi

      cmpq    %rdx, %rdi

      jne    .L2

      rep ret

looks good

gcc4.9.1 seems to be more "stable" in its optimization for the_func and main</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>