<html>
    <head>
      <base href="https://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - clang generates 1.5 slower loop code than gcc"
   href="https://llvm.org/bugs/show_bug.cgi?id=23269">23269</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>clang generates 1.5 slower loop code than gcc
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>LLVM Codegen
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>dvyukov@google.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvmbugs@cs.uiuc.edu
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>$ clang++ -v
clang version 3.7.0 (trunk 234143)
Target: x86_64-unknown-linux-gnu
$ g++ -v
Target: x86_64-linux-gnu
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1) 

Below is the test program.

Processor is Intel(R) Xeon(R) CPU E5-2690 0 @ 2.90GHz.

Build the program with:
$ g++/clang++ test.cc -Wall -O3 -msse3 -g

g++ compiled binary runs 2.813s.
g++ compiled binary runs 4.353s.

===========
#include <stdlib.h>

typedef unsigned char byte;
byte* volatile arr1;
byte* volatile arr2;

__attribute__((noinline)) void compare(byte* p1, byte* p2, bool* f1, bool *f2)
{
    bool cnt = false;
    for (int i = 0; i < 1<<16; i++) {
        byte v1 = p1[i];
        byte v2 = p2[i];
        if (__builtin_expect(v1 == 0 && v2 != 0, 0)) {
            *f1 = true;
            *f2 = true;
            return;
        }
        if (__builtin_expect(v1 < v2, 0)) {
            cnt = true;
        }
    }
    *f1 = false;
    *f2 = cnt;
}

int main() {
    arr1 = (byte*)calloc(1<<16, 1);
    arr2 = (byte*)calloc(1<<16, 1);
    for (int i = 0; i < 1000; i++) {
        int idx = rand() % (1<<16);
        arr1[idx] = 100;
        arr2[idx] = 100;
    }
    int x = 0;
    for (int i = 0; i < 50000; i++) {
        bool f1, f2;
        compare(arr1, arr2, &f1, &f2);
        x += f1;
        x += f2;
    }
    return x;
}
=====

g++-compiled binary profile:

       │    0000000000400630 <compare(unsigned char*, unsigned char*, bool*,
bool*)>:
       │      xor    %eax,%eax
       │      xor    %r9d,%r9d
       │      mov    $0x1,%r11d
       │      nop
  0.02 │10:   movzbl (%rsi,%rax,1),%r8d
  8.05 │      movzbl (%rdi,%rax,1),%r10d
 29.73 │      test   %r8b,%r8b
       │      jne    39
  6.48 │1f:   cmp    %r8b,%r10b
  9.31 │      cmovb  %r11d,%r9d
 43.24 │      add    $0x1,%rax
  0.03 │      cmp    $0x10000,%rax
       │      jne    10
       │      movb   $0x0,(%rdx)
       │      mov    %r9b,(%rcx)
       │      retq
  3.14 │39:   test   %r10b,%r10b
       │      jne    1f
       │      movb   $0x1,(%rdx)
       │      movb   $0x1,(%rcx)
       │      retq


clang++-compiled profile:


       │    0000000000400640 <compare(unsigned char*, unsigned char*, bool*,
bool*)>:                                                                    ▒
       │      xor    %r11d,%r11d                                               
                                                                         ▒
       │      xor    %r8d,%r8d                                                 
                                                                         ▒
       │      nop                                                              
                                                                         ▒
  4.14 │10:   mov    (%rdi,%r11,1),%r9b                                        
                                                                         ▒
 12.93 │      mov    (%rsi,%r11,1),%r10b                                       
                                                                         ▒
  6.93 │      test   %r9b,%r9b                                                 
                                                                         ▒
       │      jne    22                                                        
                                                                         ▒
  4.11 │      test   %r10b,%r10b                                               
                                                                         ▒
       │      jne    76                                                        
                                                                         ▒
  5.82 │22:   movzbl %r10b,%r10d                                               
                                                                         ▒
  1.43 │      movzbl %r9b,%eax                                                 
                                                                         ▒
  4.40 │      cmp    %r10d,%eax                                                
                                                                         ▒
  8.51 │      mov    $0x1,%r9b                                                 
                                                                         ▒
  1.44 │   ┌──jb     35                                                        
                                                                         ◆
  7.17 │   │  mov    %r8b,%r9b                                                 
                                                                         ▒
  1.63 │35:└─ mov    0x1(%rdi,%r11,1),%r8b                                     
                                                                         ▒
  2.84 │      mov    0x1(%rsi,%r11,1),%r10b                                    
                                                                         ▒
  8.69 │      test   %r8b,%r8b                                                 
                                                                         ▒
       │      jne    49                                                        
                                                                         ▒
  1.80 │      test   %r10b,%r10b                                               
                                                                         ▒
       │      jne    76                                                        
                                                                         ▒
  3.94 │49:   inc    %r11                                                      
                                                                         ▒
  5.29 │      movzbl %r10b,%r10d                                               
                                                                         ▒
  2.14 │      movzbl %r8b,%eax                                                 
                                                                         ▒
  1.18 │      cmp    %r10d,%eax                                                
                                                                         ▒
  4.64 │      mov    $0x1,%r8b                                                 
                                                                         ▒
  3.93 │      jb     5f                                                        
                                                                         ▒
  3.83 │      mov    %r9b,%r8b                                                 
                                                                         ▒
  1.58 │5f:   inc    %r11                                                      
                                                                         ▒
  1.62 │      cmp    $0x10000,%r11                                             
                                                                         ▒
       │      jl     10                                                        
                                                                         ▒
       │      movb   $0x0,(%rdx)                                               
                                                                         ▒
       │      and    $0x1,%r8b                                                 
                                                                         ▒
       │      mov    %r8b,(%rcx)                                               
                                                                         ▒
       │      retq                                                             
                                                                         ▒
       │76:   movb   $0x1,(%rdx)                                               
                                                                         ▒
       │      mov    $0x1,%r8b                                                 
                                                                         ▒
       │      mov    %r8b,(%rcx)                                               
                                                                         ▒
       │      retq</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>