[LLVMbugs] [Bug 23269] New: clang generates 1.5 slower loop code than gcc

bugzilla-daemon at llvm.org bugzilla-daemon at llvm.org
Fri Apr 17 08:43:54 PDT 2015


https://llvm.org/bugs/show_bug.cgi?id=23269

            Bug ID: 23269
           Summary: clang generates 1.5 slower loop code than gcc
           Product: clang
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: normal
          Priority: P
         Component: LLVM Codegen
          Assignee: unassignedclangbugs at nondot.org
          Reporter: dvyukov at google.com
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified

$ clang++ -v
clang version 3.7.0 (trunk 234143)
Target: x86_64-unknown-linux-gnu
$ g++ -v
Target: x86_64-linux-gnu
gcc version 4.8.2 (Ubuntu 4.8.2-19ubuntu1) 

Below is the test program.

Processor is Intel(R) Xeon(R) CPU E5-2690 0 @ 2.90GHz.

Build the program with:
$ g++/clang++ test.cc -Wall -O3 -msse3 -g

g++ compiled binary runs 2.813s.
g++ compiled binary runs 4.353s.

===========
#include <stdlib.h>

typedef unsigned char byte;
byte* volatile arr1;
byte* volatile arr2;

__attribute__((noinline)) void compare(byte* p1, byte* p2, bool* f1, bool *f2)
{
    bool cnt = false;
    for (int i = 0; i < 1<<16; i++) {
        byte v1 = p1[i];
        byte v2 = p2[i];
        if (__builtin_expect(v1 == 0 && v2 != 0, 0)) {
            *f1 = true;
            *f2 = true;
            return;
        }
        if (__builtin_expect(v1 < v2, 0)) {
            cnt = true;
        }
    }
    *f1 = false;
    *f2 = cnt;
}

int main() {
    arr1 = (byte*)calloc(1<<16, 1);
    arr2 = (byte*)calloc(1<<16, 1);
    for (int i = 0; i < 1000; i++) {
        int idx = rand() % (1<<16);
        arr1[idx] = 100;
        arr2[idx] = 100;
    }
    int x = 0;
    for (int i = 0; i < 50000; i++) {
        bool f1, f2;
        compare(arr1, arr2, &f1, &f2);
        x += f1;
        x += f2;
    }
    return x;
}
=====

g++-compiled binary profile:

       │    0000000000400630 <compare(unsigned char*, unsigned char*, bool*,
bool*)>:
       │      xor    %eax,%eax
       │      xor    %r9d,%r9d
       │      mov    $0x1,%r11d
       │      nop
  0.02 │10:   movzbl (%rsi,%rax,1),%r8d
  8.05 │      movzbl (%rdi,%rax,1),%r10d
 29.73 │      test   %r8b,%r8b
       │      jne    39
  6.48 │1f:   cmp    %r8b,%r10b
  9.31 │      cmovb  %r11d,%r9d
 43.24 │      add    $0x1,%rax
  0.03 │      cmp    $0x10000,%rax
       │      jne    10
       │      movb   $0x0,(%rdx)
       │      mov    %r9b,(%rcx)
       │      retq
  3.14 │39:   test   %r10b,%r10b
       │      jne    1f
       │      movb   $0x1,(%rdx)
       │      movb   $0x1,(%rcx)
       │      retq


clang++-compiled profile:


       │    0000000000400640 <compare(unsigned char*, unsigned char*, bool*,
bool*)>:                                                                    ▒
       │      xor    %r11d,%r11d                                               
                                                                         ▒
       │      xor    %r8d,%r8d                                                 
                                                                         ▒
       │      nop                                                              
                                                                         ▒
  4.14 │10:   mov    (%rdi,%r11,1),%r9b                                        
                                                                         ▒
 12.93 │      mov    (%rsi,%r11,1),%r10b                                       
                                                                         ▒
  6.93 │      test   %r9b,%r9b                                                 
                                                                         ▒
       │      jne    22                                                        
                                                                         ▒
  4.11 │      test   %r10b,%r10b                                               
                                                                         ▒
       │      jne    76                                                        
                                                                         ▒
  5.82 │22:   movzbl %r10b,%r10d                                               
                                                                         ▒
  1.43 │      movzbl %r9b,%eax                                                 
                                                                         ▒
  4.40 │      cmp    %r10d,%eax                                                
                                                                         ▒
  8.51 │      mov    $0x1,%r9b                                                 
                                                                         ▒
  1.44 │   ┌──jb     35                                                        
                                                                         ◆
  7.17 │   │  mov    %r8b,%r9b                                                 
                                                                         ▒
  1.63 │35:└─ mov    0x1(%rdi,%r11,1),%r8b                                     
                                                                         ▒
  2.84 │      mov    0x1(%rsi,%r11,1),%r10b                                    
                                                                         ▒
  8.69 │      test   %r8b,%r8b                                                 
                                                                         ▒
       │      jne    49                                                        
                                                                         ▒
  1.80 │      test   %r10b,%r10b                                               
                                                                         ▒
       │      jne    76                                                        
                                                                         ▒
  3.94 │49:   inc    %r11                                                      
                                                                         ▒
  5.29 │      movzbl %r10b,%r10d                                               
                                                                         ▒
  2.14 │      movzbl %r8b,%eax                                                 
                                                                         ▒
  1.18 │      cmp    %r10d,%eax                                                
                                                                         ▒
  4.64 │      mov    $0x1,%r8b                                                 
                                                                         ▒
  3.93 │      jb     5f                                                        
                                                                         ▒
  3.83 │      mov    %r9b,%r8b                                                 
                                                                         ▒
  1.58 │5f:   inc    %r11                                                      
                                                                         ▒
  1.62 │      cmp    $0x10000,%r11                                             
                                                                         ▒
       │      jl     10                                                        
                                                                         ▒
       │      movb   $0x0,(%rdx)                                               
                                                                         ▒
       │      and    $0x1,%r8b                                                 
                                                                         ▒
       │      mov    %r8b,(%rcx)                                               
                                                                         ▒
       │      retq                                                             
                                                                         ▒
       │76:   movb   $0x1,(%rdx)                                               
                                                                         ▒
       │      mov    $0x1,%r8b                                                 
                                                                         ▒
       │      mov    %r8b,(%rcx)                                               
                                                                         ▒
       │      retq

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20150417/153fed98/attachment.html>


More information about the llvm-bugs mailing list