<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - GCC produces much better code for simple double loops than Clang"
   href="https://bugs.llvm.org/show_bug.cgi?id=33757">33757</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>GCC produces much better code for simple double loops than Clang
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Loop Optimizer
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>gonzalobg88@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>See here the code and asm comparison, code and assembly follow below:

<a href="https://gcc.godbolt.org/#g:!((g:!((g:!((h:codeEditor,i:(fontScale:1.1,j:1,source:'%23include+%3Cstdint.h%3E%0A%0Aint64_t+c_iter(int64_t+high)%0A%7B+++++++%0A++int64_t+total+%3D+0%3B%0A++int64_t+i%3B%0A++for+(i+%3D+1%3B+i+%3C%3D+high%3B+i%2B%2B)+%7B%0A++++while+(i+%25+2+!!%3D+0)+%7B%0A++++++i%2B%2B%3B%0A++++%7D%0A++++total+%2B%3D+i+*+2%3B%0A++%7D%0A++return+total%3B%0A%7D%0A%0Aint64_t+c_loop(int64_t+high)+%7B%0A++int64_t+total+%3D+0%3B%0A++int64_t+i%3B%0A++for+(i+%3D+1%3B+i+%3C%3D+high%3B+%2B%2Bi)+%7B%0A++++if+(i+%25+2+%3D%3D+0)+%7B%0A++++++total+%2B%3D+i+*+2%3B%0A++++%7D%0A++%7D%0A++return+total%3B%0A%7D'),l:'5',n:'0',o:'C%2B%2B+source+%231',t:'0')),k:33.03381678958197,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:clang400,filters:(b:'0',commentOnly:'0',directives:'0',intel:'0'),options:'-O2+-fno-unroll-loops',source:1),l:'5',n:'0',o:'x86-64+clang+4.0.0+(Editor+%231,+Compiler+%231)',t:'0')),k:33.632849877084716,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:g71,filters:(b:'0',commentOnly:'0',directives:'0',intel:'0'),options:'-O2',source:1),l:'5',n:'0',o:'x86-64+gcc+7.1+(Editor+%231,+Compiler+%232)',t:'0')),k:33.33333333333333,l:'4',n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4">https://gcc.godbolt.org/#g:!((g:!((g:!((h:codeEditor,i:(fontScale:1.1,j:1,source:'%23include+%3Cstdint.h%3E%0A%0Aint64_t+c_iter(int64_t+high)%0A%7B+++++++%0A++int64_t+total+%3D+0%3B%0A++int64_t+i%3B%0A++for+(i+%3D+1%3B+i+%3C%3D+high%3B+i%2B%2B)+%7B%0A++++while+(i+%25+2+!!%3D+0)+%7B%0A++++++i%2B%2B%3B%0A++++%7D%0A++++total+%2B%3D+i+*+2%3B%0A++%7D%0A++return+total%3B%0A%7D%0A%0Aint64_t+c_loop(int64_t+high)+%7B%0A++int64_t+total+%3D+0%3B%0A++int64_t+i%3B%0A++for+(i+%3D+1%3B+i+%3C%3D+high%3B+%2B%2Bi)+%7B%0A++++if+(i+%25+2+%3D%3D+0)+%7B%0A++++++total+%2B%3D+i+*+2%3B%0A++++%7D%0A++%7D%0A++return+total%3B%0A%7D'),l:'5',n:'0',o:'C%2B%2B+source+%231',t:'0')),k:33.03381678958197,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:clang400,filters:(b:'0',commentOnly:'0',directives:'0',intel:'0'),options:'-O2+-fno-unroll-loops',source:1),l:'5',n:'0',o:'x86-64+clang+4.0.0+(Editor+%231,+Compiler+%231)',t:'0')),k:33.632849877084716,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:g71,filters:(b:'0',commentOnly:'0',directives:'0',intel:'0'),options:'-O2',source:1),l:'5',n:'0',o:'x86-64+gcc+7.1+(Editor+%231,+Compiler+%232)',t:'0')),k:33.33333333333333,l:'4',n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4</a>

Code:

#include <stdint.h>

int64_t c_iter(int64_t high)
{       
  int64_t total = 0;
  int64_t i;
  for (i = 1; i <= high; i++) {
    while (i % 2 != 0) {
      i++;
    }
    total += i * 2;
  }
  return total;
}

int64_t c_loop(int64_t high) {
  int64_t total = 0;
  int64_t i;
  for (i = 1; i <= high; ++i) {
    if (i % 2 == 0) {
      total += i * 2;
    }
  }
  return total;
}

GCC assembly:

c_iter(long):
        test    rdi, rdi
        jle     .L4
        sub     rdi, 1
        mov     edx, 4
        xor     eax, eax
        shr     rdi
        lea     rcx, [8+rdi*4]
.L3:
        add     rax, rdx
        add     rdx, 4
        cmp     rcx, rdx
        jne     .L3
        rep ret
.L4:
        xor     eax, eax
        ret
c_loop(long):
        test    rdi, rdi
        jle     .L12
        add     rdi, 1
        mov     edx, 1
        xor     eax, eax
        jmp     .L9
.L11:
        lea     rcx, [rax+rdx*2]
        test    dl, 1
        cmove   rax, rcx
.L9:
        add     rdx, 1
        cmp     rdx, rdi
        jne     .L11
        rep ret
.L12:
        xor     eax, eax
        ret

Clang assembly -O2 -funroll-loops:

c_iter(long):                             # @c_iter(long)
        xor     eax, eax
        test    rdi, rdi
        jle     .LBB0_5
        mov     ecx, 1
.LBB0_2:                                # =>This Loop Header: Depth=1
        lea     rax, [rax + 2*rcx - 2]
.LBB0_3:                                #   Parent Loop BB0_2 Depth=1
        mov     edx, ecx
        inc     rcx
        add     rax, 2
        test    dl, 1
        jne     .LBB0_3
        lea     rdx, [rcx - 1]
        cmp     rdx, rdi
        jl      .LBB0_2
.LBB0_5:
        ret

c_loop(long):                             # @c_loop(long)
        test    rdi, rdi
        jle     .LBB1_1
        lea     r8, [rdi - 1]
        mov     r9, rdi
        and     r9, 3
        je      .LBB1_3
        xor     edx, edx
        mov     ecx, 2
        xor     eax, eax
.LBB1_5:                                # =>This Inner Loop Header: Depth=1
        inc     rdx
        test    dl, 1
        mov     esi, 0
        cmove   rsi, rcx
        add     rax, rsi
        add     rcx, 2
        cmp     r9, rdx
        jne     .LBB1_5
        inc     rdx
        cmp     r8, 3
        jae     .LBB1_8
        jmp     .LBB1_10
.LBB1_1:
        xor     eax, eax
        ret
.LBB1_3:
        xor     eax, eax
        mov     edx, 1
        cmp     r8, 3
        jb      .LBB1_10
.LBB1_8:
        neg     rdi
        lea     rcx, [rdx + rdx + 6]
        lea     rdx, [rdx + 3]
        xor     r8d, r8d
.LBB1_9:                                # =>This Inner Loop Header: Depth=1
        lea     r9d, [rdx - 3]
        lea     rsi, [rcx - 6]
        test    r9b, 1
        cmovne  rsi, r8
        add     rsi, rax
        lea     r9d, [rdx - 2]
        lea     rax, [rcx - 4]
        test    r9b, 1
        cmovne  rax, r8
        add     rax, rsi
        lea     r9d, [rdx - 1]
        lea     rsi, [rcx - 2]
        test    r9b, 1
        cmovne  rsi, r8
        add     rsi, rax
        test    dl, 1
        mov     eax, 0
        cmove   rax, rcx
        add     rax, rsi
        lea     rsi, [rdi + rdx + 4]
        add     rdx, 4
        add     rcx, 8
        cmp     rsi, 4
        jne     .LBB1_9
.LBB1_10:
        ret

Clang assembly: -O2 -fno-unroll-loops

c_iter(long):                             # @c_iter(long)
        xor     eax, eax
        test    rdi, rdi
        jle     .LBB0_5
        mov     ecx, 1
.LBB0_2:                                # =>This Loop Header: Depth=1
        lea     rax, [rax + 2*rcx - 2]
.LBB0_3:                                #   Parent Loop BB0_2 Depth=1
        mov     edx, ecx
        inc     rcx
        add     rax, 2
        test    dl, 1
        jne     .LBB0_3
        lea     rdx, [rcx - 1]
        cmp     rdx, rdi
        jl      .LBB0_2
.LBB0_5:
        ret

c_loop(long):                             # @c_loop(long)
        test    rdi, rdi
        jle     .LBB1_1
        xor     ecx, ecx
        mov     edx, 2
        xor     eax, eax
.LBB1_3:                                # =>This Inner Loop Header: Depth=1
        inc     rcx
        test    cl, 1
        mov     esi, 0
        cmove   rsi, rdx
        add     rax, rsi
        add     rdx, 2
        cmp     rdi, rcx
        jne     .LBB1_3
        ret
.LBB1_1:
        xor     eax, eax
        ret</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>