<html>
<head>
<base href="http://llvm.org/bugs/" />
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW --- - popcnt not generated"
href="http://llvm.org/bugs/show_bug.cgi?id=17101">17101</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>popcnt not generated
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>kkhoo@perfwizard.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvmbugs@cs.uiuc.edu
</td>
</tr>
<tr>
<th>Classification</th>
<td>Unclassified
</td>
</tr></table>
<p>
<div>
<pre>$ ./clang -v
clang version 3.4 (trunk 189776)
Target: x86_64-apple-darwin11.4.2
Thread model: posix
$ cat popcnt.c
unsigned int foo(unsigned int x) {
unsigned int countOfOnes = 0;
unsigned int i;
for (i=0; i<32; i++) {
if (((x >> i) & 0x1) == 1) countOfOnes++;
}
return countOfOnes;
}
$ ./clang -S -O3 -march=core-avx2 popcnt.c -o /dev/stdout
.section __TEXT,__text,regular,pure_instructions
.section __TEXT,__const
.align 5
LCPI0_0:
.long 1 ## 0x1
.long 2 ## 0x2
.long 4 ## 0x4
.long 8 ## 0x8
.long 16 ## 0x10
.long 32 ## 0x20
.long 64 ## 0x40
.long 128 ## 0x80
LCPI0_2:
.long 256 ## 0x100
.long 512 ## 0x200
.long 1024 ## 0x400
.long 2048 ## 0x800
.long 4096 ## 0x1000
.long 8192 ## 0x2000
.long 16384 ## 0x4000
.long 32768 ## 0x8000
LCPI0_3:
.long 16777216 ## 0x1000000
.long 33554432 ## 0x2000000
.long 67108864 ## 0x4000000
.long 134217728 ## 0x8000000
.long 268435456 ## 0x10000000
.long 536870912 ## 0x20000000
.long 1073741824 ## 0x40000000
.long 2147483648 ## 0x80000000
LCPI0_4:
.long 65536 ## 0x10000
.long 131072 ## 0x20000
.long 262144 ## 0x40000
.long 524288 ## 0x80000
.long 1048576 ## 0x100000
.long 2097152 ## 0x200000
.long 4194304 ## 0x400000
.long 8388608 ## 0x800000
.section __TEXT,__literal4,4byte_literals
.align 2
LCPI0_1:
.long 1 ## 0x1
.section __TEXT,__text,regular,pure_instructions
.globl _foo
.align 4, 0x90
_foo: ## @foo
.cfi_startproc
## BB#0: ## %for.end
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
vmovd %edi, %xmm0
vbroadcastss %xmm0, %ymm0
vandps LCPI0_0(%rip), %ymm0, %ymm1
vpxor %ymm2, %ymm2, %ymm2
vpcmpeqd %ymm2, %ymm1, %ymm1
vpbroadcastd LCPI0_1(%rip), %ymm3
vpandn %ymm3, %ymm1, %ymm1
vandps LCPI0_2(%rip), %ymm0, %ymm4
vpcmpeqd %ymm2, %ymm4, %ymm4
vpandn %ymm3, %ymm4, %ymm4
vpaddd %ymm1, %ymm4, %ymm1
vandps LCPI0_3(%rip), %ymm0, %ymm4
vpcmpeqd %ymm2, %ymm4, %ymm4
vpandn %ymm3, %ymm4, %ymm4
vandps LCPI0_4(%rip), %ymm0, %ymm0
vpcmpeqd %ymm2, %ymm0, %ymm0
vpandn %ymm3, %ymm0, %ymm0
vpaddd %ymm1, %ymm0, %ymm0
vpaddd %ymm0, %ymm4, %ymm0
vextracti128 $1, %ymm0, %xmm1
vpaddd %ymm1, %ymm0, %ymm0
vpalignr $8, %ymm0, %ymm0, %ymm1 ## ymm1 =
ymm0[8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,24,25,26,27,28,29,30,31,16,17,18,19,20,21,22,23]
vpaddd %ymm1, %ymm0, %ymm0
vphaddd %ymm0, %ymm0, %ymm0
vmovd %xmm0, %eax
popq %rbp
vzeroupper
ret
It looks like clang/llvm did something heroic here to recognize that this is a
popcount function and by using AVX2 to avoid the loop, but I was expecting this
loop to generate a simple popcnt instruction. I think this has been available
since Nehalem for Intel and Family 10H for AMD.
Better codegen would be something like this:
popcnt %edi, %eax</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>