[libc-commits] [libc] [libc][__support][bit] Switch popcount to Brian Kernighan’s Algorithm (PR #95625)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Fri Jun 14 17:42:25 PDT 2024
SchrodingerZhu wrote:
Interesting, under clang `-O3 -march=x86-64-v3 `
```c++
unsigned popcount(unsigned value) {
int count = 0;
for (int i = 0; i != 32; ++i)
if ((value >> i) & 0x1)
++count;
return count;
}
unsigned popcount2(unsigned value) {
int count = 0;
while (value) {
value &= value - 1;
++count;
}
return count;
}
```
compiles to
```
.LCPI0_0:
.long 1 # 0x1
.long 2 # 0x2
.long 3 # 0x3
.long 4 # 0x4
.LCPI0_2:
.long 3 # 0x3
.long 0 # 0x0
.long 1 # 0x1
.long 2 # 0x2
.LCPI0_5:
.long 24 # 0x18
.long 25 # 0x19
.long 26 # 0x1a
.long 27 # 0x1b
.LCPI0_1:
.long 8 # 0x8
.long 9 # 0x9
.long 10 # 0xa
.long 11 # 0xb
.long 12 # 0xc
.long 13 # 0xd
.long 14 # 0xe
.long 15 # 0xf
.LCPI0_4:
.long 16 # 0x10
.long 17 # 0x11
.long 18 # 0x12
.long 19 # 0x13
.long 20 # 0x14
.long 21 # 0x15
.long 22 # 0x16
.long 23 # 0x17
.LCPI0_3:
.long 1 # 0x1
popcount(unsigned int): # @popcount(unsigned int)
vmovd xmm0, edi
vpbroadcastd ymm0, xmm0
vpsrlvd xmm1, xmm0, xmmword ptr [rip + .LCPI0_0]
mov eax, edi
vpsrlvd ymm2, ymm0, ymmword ptr [rip + .LCPI0_1]
shr eax, 5
mov ecx, edi
shr ecx, 6
mov edx, edi
shr edx, 7
vbroadcasti128 ymm3, xmmword ptr [rip + .LCPI0_2] # ymm3 = [3,0,1,2,3,0,1,2]
vpermd ymm1, ymm3, ymm1
vpblendd ymm1, ymm1, ymm0, 1 # ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7]
vmovd xmm3, eax
vpbroadcastd ymm3, xmm3
vpblendd ymm1, ymm1, ymm3, 32 # ymm1 = ymm1[0,1,2,3,4],ymm3[5],ymm1[6,7]
vmovd xmm3, ecx
vpbroadcastd ymm3, xmm3
vpblendd ymm1, ymm1, ymm3, 64 # ymm1 = ymm1[0,1,2,3,4,5],ymm3[6],ymm1[7]
vmovd xmm3, edx
vpbroadcastd ymm3, xmm3
vpblendd ymm1, ymm1, ymm3, 128 # ymm1 = ymm1[0,1,2,3,4,5,6],ymm3[7]
vpbroadcastd ymm3, dword ptr [rip + .LCPI0_3] # ymm3 = [1,1,1,1,1,1,1,1]
vpsrlvd ymm4, ymm0, ymmword ptr [rip + .LCPI0_4]
vpand ymm2, ymm2, ymm3
vpand ymm1, ymm1, ymm3
vpaddd ymm1, ymm1, ymm2
vpand ymm2, ymm4, ymm3
vpsrlvd xmm0, xmm0, xmmword ptr [rip + .LCPI0_5]
vpand xmm0, xmm0, xmm3
mov eax, edi
shr eax, 29
and eax, 1
mov ecx, edi
shr ecx, 31
vextracti128 xmm3, ymm1, 1
vpaddd xmm1, xmm1, xmm3
vpshufd xmm3, xmm1, 238 # xmm3 = xmm1[2,3,2,3]
vpaddd xmm1, xmm1, xmm3
vpshufd xmm3, xmm1, 85 # xmm3 = xmm1[1,1,1,1]
vpaddd xmm1, xmm1, xmm3
vmovd edx, xmm1
vextracti128 xmm1, ymm2, 1
vpaddd xmm1, xmm2, xmm1
vpshufd xmm2, xmm1, 238 # xmm2 = xmm1[2,3,2,3]
vpaddd xmm1, xmm1, xmm2
vpshufd xmm2, xmm1, 85 # xmm2 = xmm1[1,1,1,1]
vpaddd xmm1, xmm1, xmm2
vmovd esi, xmm1
add esi, edx
vpshufd xmm1, xmm0, 238 # xmm1 = xmm0[2,3,2,3]
vpaddd xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 85 # xmm1 = xmm0[1,1,1,1]
vpaddd xmm0, xmm0, xmm1
vmovd edx, xmm0
bt edi, 28
adc edx, esi
bt edi, 30
adc eax, 0
add eax, ecx
add eax, edx
vzeroupper
ret
popcount2(unsigned int): # @popcount2(unsigned int)
popcnt eax, edi
ret
```
https://github.com/llvm/llvm-project/pull/95625
More information about the libc-commits
mailing list