[LLVMbugs] [Bug 17101] New: popcnt not generated
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Wed Sep 4 11:13:45 PDT 2013
http://llvm.org/bugs/show_bug.cgi?id=17101
Bug ID: 17101
Summary: popcnt not generated
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: normal
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: kkhoo at perfwizard.com
CC: llvmbugs at cs.uiuc.edu
Classification: Unclassified
$ ./clang -v
clang version 3.4 (trunk 189776)
Target: x86_64-apple-darwin11.4.2
Thread model: posix
$ cat popcnt.c
unsigned int foo(unsigned int x) {
unsigned int countOfOnes = 0;
unsigned int i;
for (i=0; i<32; i++) {
if (((x >> i) & 0x1) == 1) countOfOnes++;
}
return countOfOnes;
}
$ ./clang -S -O3 -march=core-avx2 popcnt.c -o /dev/stdout
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__const
.align 5
LCPI0_0:
.long 1 ## 0x1
.long 2 ## 0x2
.long 4 ## 0x4
.long 8 ## 0x8
.long 16 ## 0x10
.long 32 ## 0x20
.long 64 ## 0x40
.long 128 ## 0x80
LCPI0_2:
.long 256 ## 0x100
.long 512 ## 0x200
.long 1024 ## 0x400
.long 2048 ## 0x800
.long 4096 ## 0x1000
.long 8192 ## 0x2000
.long 16384 ## 0x4000
.long 32768 ## 0x8000
LCPI0_3:
.long 16777216 ## 0x1000000
.long 33554432 ## 0x2000000
.long 67108864 ## 0x4000000
.long 134217728 ## 0x8000000
.long 268435456 ## 0x10000000
.long 536870912 ## 0x20000000
.long 1073741824 ## 0x40000000
.long 2147483648 ## 0x80000000
LCPI0_4:
.long 65536 ## 0x10000
.long 131072 ## 0x20000
.long 262144 ## 0x40000
.long 524288 ## 0x80000
.long 1048576 ## 0x100000
.long 2097152 ## 0x200000
.long 4194304 ## 0x400000
.long 8388608 ## 0x800000
.section __TEXT,__literal4,4byte_literals
.align 2
LCPI0_1:
.long 1 ## 0x1
.section __TEXT,__text,regular,pure_instructions
.globl _foo
.align 4, 0x90
_foo: ## @foo
.cfi_startproc
## BB#0: ## %for.end
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
vmovd %edi, %xmm0
vbroadcastss %xmm0, %ymm0
vandps LCPI0_0(%rip), %ymm0, %ymm1
vpxor %ymm2, %ymm2, %ymm2
vpcmpeqd %ymm2, %ymm1, %ymm1
vpbroadcastd LCPI0_1(%rip), %ymm3
vpandn %ymm3, %ymm1, %ymm1
vandps LCPI0_2(%rip), %ymm0, %ymm4
vpcmpeqd %ymm2, %ymm4, %ymm4
vpandn %ymm3, %ymm4, %ymm4
vpaddd %ymm1, %ymm4, %ymm1
vandps LCPI0_3(%rip), %ymm0, %ymm4
vpcmpeqd %ymm2, %ymm4, %ymm4
vpandn %ymm3, %ymm4, %ymm4
vandps LCPI0_4(%rip), %ymm0, %ymm0
vpcmpeqd %ymm2, %ymm0, %ymm0
vpandn %ymm3, %ymm0, %ymm0
vpaddd %ymm1, %ymm0, %ymm0
vpaddd %ymm0, %ymm4, %ymm0
vextracti128 $1, %ymm0, %xmm1
vpaddd %ymm1, %ymm0, %ymm0
vpalignr $8, %ymm0, %ymm0, %ymm1 ## ymm1 =
ymm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31,16,17,18,19,20,21,22,23]
vpaddd %ymm1, %ymm0, %ymm0
vphaddd %ymm0, %ymm0, %ymm0
vmovd %xmm0, %eax
popq %rbp
vzeroupper
ret
It looks like clang/llvm did something heroic here to recognize that this is a
popcount function and by using AVX2 to avoid the loop, but I was expecting this
loop to generate a simple popcnt instruction. I think this has been available
since Nehalem for Intel and Family 10H for AMD.
Better codegen would be something like this:
popcnt %edi, %eax
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20130904/657f67a3/attachment.html>
More information about the llvm-bugs
mailing list