[LLVMbugs] [Bug 17101] New: popcnt not generated
    bugzilla-daemon at llvm.org 
    bugzilla-daemon at llvm.org
       
    Wed Sep  4 11:13:45 PDT 2013
    
    
  
http://llvm.org/bugs/show_bug.cgi?id=17101
            Bug ID: 17101
           Summary: popcnt not generated
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: kkhoo at perfwizard.com
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified
$ ./clang -v
clang version 3.4 (trunk 189776)
Target: x86_64-apple-darwin11.4.2
Thread model: posix
$ cat popcnt.c 
unsigned int foo(unsigned int x) {
    unsigned int countOfOnes = 0;
    unsigned int i;
    for (i=0; i<32; i++) {
        if (((x >> i) & 0x1) == 1) countOfOnes++;
    }
    return countOfOnes;
}
$ ./clang -S -O3 -march=core-avx2 popcnt.c -o /dev/stdout 
    .section    __TEXT,__text,regular,pure_instructions
    .section    __TEXT,__const
    .align    5
LCPI0_0:
    .long    1                       ## 0x1
    .long    2                       ## 0x2
    .long    4                       ## 0x4
    .long    8                       ## 0x8
    .long    16                      ## 0x10
    .long    32                      ## 0x20
    .long    64                      ## 0x40
    .long    128                     ## 0x80
LCPI0_2:
    .long    256                     ## 0x100
    .long    512                     ## 0x200
    .long    1024                    ## 0x400
    .long    2048                    ## 0x800
    .long    4096                    ## 0x1000
    .long    8192                    ## 0x2000
    .long    16384                   ## 0x4000
    .long    32768                   ## 0x8000
LCPI0_3:
    .long    16777216                ## 0x1000000
    .long    33554432                ## 0x2000000
    .long    67108864                ## 0x4000000
    .long    134217728               ## 0x8000000
    .long    268435456               ## 0x10000000
    .long    536870912               ## 0x20000000
    .long    1073741824              ## 0x40000000
    .long    2147483648              ## 0x80000000
LCPI0_4:
    .long    65536                   ## 0x10000
    .long    131072                  ## 0x20000
    .long    262144                  ## 0x40000
    .long    524288                  ## 0x80000
    .long    1048576                 ## 0x100000
    .long    2097152                 ## 0x200000
    .long    4194304                 ## 0x400000
    .long    8388608                 ## 0x800000
    .section    __TEXT,__literal4,4byte_literals
    .align    2
LCPI0_1:
    .long    1                       ## 0x1
    .section    __TEXT,__text,regular,pure_instructions
    .globl    _foo
    .align    4, 0x90
_foo:                                   ## @foo
    .cfi_startproc
## BB#0:                                ## %for.end
    pushq    %rbp
Ltmp2:
    .cfi_def_cfa_offset 16
Ltmp3:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp4:
    .cfi_def_cfa_register %rbp
    vmovd    %edi, %xmm0
    vbroadcastss    %xmm0, %ymm0
    vandps    LCPI0_0(%rip), %ymm0, %ymm1
    vpxor    %ymm2, %ymm2, %ymm2
    vpcmpeqd    %ymm2, %ymm1, %ymm1
    vpbroadcastd    LCPI0_1(%rip), %ymm3
    vpandn    %ymm3, %ymm1, %ymm1
    vandps    LCPI0_2(%rip), %ymm0, %ymm4
    vpcmpeqd    %ymm2, %ymm4, %ymm4
    vpandn    %ymm3, %ymm4, %ymm4
    vpaddd    %ymm1, %ymm4, %ymm1
    vandps    LCPI0_3(%rip), %ymm0, %ymm4
    vpcmpeqd    %ymm2, %ymm4, %ymm4
    vpandn    %ymm3, %ymm4, %ymm4
    vandps    LCPI0_4(%rip), %ymm0, %ymm0
    vpcmpeqd    %ymm2, %ymm0, %ymm0
    vpandn    %ymm3, %ymm0, %ymm0
    vpaddd    %ymm1, %ymm0, %ymm0
    vpaddd    %ymm0, %ymm4, %ymm0
    vextracti128    $1, %ymm0, %xmm1
    vpaddd    %ymm1, %ymm0, %ymm0
    vpalignr    $8, %ymm0, %ymm0, %ymm1 ## ymm1 =
ymm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31,16,17,18,19,20,21,22,23]
    vpaddd    %ymm1, %ymm0, %ymm0
    vphaddd    %ymm0, %ymm0, %ymm0
    vmovd    %xmm0, %eax
    popq    %rbp
    vzeroupper
    ret
It looks like clang/llvm did something heroic here to recognize that this is a
popcount function and by using AVX2 to avoid the loop, but I was expecting this
loop to generate a simple popcnt instruction. I think this has been available
since Nehalem for Intel and Family 10H for AMD. 
Better codegen would be something like this:
popcnt %edi, %eax
-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20130904/657f67a3/attachment.html>
    
    
More information about the llvm-bugs
mailing list