[libc-commits] [libc] [libc][__support][bit] Switch popcount to Brian Kernighan’s Algorithm (PR #95625)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Fri Jun 14 17:42:25 PDT 2024


SchrodingerZhu wrote:

Interesting, under clang `-O3 -march=x86-64-v3 `

```c++
unsigned popcount(unsigned value) {
  int count = 0;
  for (int i = 0; i != 32; ++i)
    if ((value >> i) & 0x1)
      ++count;

  return count;
}


unsigned popcount2(unsigned value) {
  int count = 0;
   while (value) {
    value &= value - 1;
    ++count;
  }

  return count;
}
```
compiles to
```
.LCPI0_0:
        .long   1                               # 0x1
        .long   2                               # 0x2
        .long   3                               # 0x3
        .long   4                               # 0x4
.LCPI0_2:
        .long   3                               # 0x3
        .long   0                               # 0x0
        .long   1                               # 0x1
        .long   2                               # 0x2
.LCPI0_5:
        .long   24                              # 0x18
        .long   25                              # 0x19
        .long   26                              # 0x1a
        .long   27                              # 0x1b
.LCPI0_1:
        .long   8                               # 0x8
        .long   9                               # 0x9
        .long   10                              # 0xa
        .long   11                              # 0xb
        .long   12                              # 0xc
        .long   13                              # 0xd
        .long   14                              # 0xe
        .long   15                              # 0xf
.LCPI0_4:
        .long   16                              # 0x10
        .long   17                              # 0x11
        .long   18                              # 0x12
        .long   19                              # 0x13
        .long   20                              # 0x14
        .long   21                              # 0x15
        .long   22                              # 0x16
        .long   23                              # 0x17
.LCPI0_3:
        .long   1                               # 0x1
popcount(unsigned int):                           # @popcount(unsigned int)
        vmovd   xmm0, edi
        vpbroadcastd    ymm0, xmm0
        vpsrlvd xmm1, xmm0, xmmword ptr [rip + .LCPI0_0]
        mov     eax, edi
        vpsrlvd ymm2, ymm0, ymmword ptr [rip + .LCPI0_1]
        shr     eax, 5
        mov     ecx, edi
        shr     ecx, 6
        mov     edx, edi
        shr     edx, 7
        vbroadcasti128  ymm3, xmmword ptr [rip + .LCPI0_2] # ymm3 = [3,0,1,2,3,0,1,2]
        vpermd  ymm1, ymm3, ymm1
        vpblendd        ymm1, ymm1, ymm0, 1             # ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7]
        vmovd   xmm3, eax
        vpbroadcastd    ymm3, xmm3
        vpblendd        ymm1, ymm1, ymm3, 32            # ymm1 = ymm1[0,1,2,3,4],ymm3[5],ymm1[6,7]
        vmovd   xmm3, ecx
        vpbroadcastd    ymm3, xmm3
        vpblendd        ymm1, ymm1, ymm3, 64            # ymm1 = ymm1[0,1,2,3,4,5],ymm3[6],ymm1[7]
        vmovd   xmm3, edx
        vpbroadcastd    ymm3, xmm3
        vpblendd        ymm1, ymm1, ymm3, 128           # ymm1 = ymm1[0,1,2,3,4,5,6],ymm3[7]
        vpbroadcastd    ymm3, dword ptr [rip + .LCPI0_3] # ymm3 = [1,1,1,1,1,1,1,1]
        vpsrlvd ymm4, ymm0, ymmword ptr [rip + .LCPI0_4]
        vpand   ymm2, ymm2, ymm3
        vpand   ymm1, ymm1, ymm3
        vpaddd  ymm1, ymm1, ymm2
        vpand   ymm2, ymm4, ymm3
        vpsrlvd xmm0, xmm0, xmmword ptr [rip + .LCPI0_5]
        vpand   xmm0, xmm0, xmm3
        mov     eax, edi
        shr     eax, 29
        and     eax, 1
        mov     ecx, edi
        shr     ecx, 31
        vextracti128    xmm3, ymm1, 1
        vpaddd  xmm1, xmm1, xmm3
        vpshufd xmm3, xmm1, 238                 # xmm3 = xmm1[2,3,2,3]
        vpaddd  xmm1, xmm1, xmm3
        vpshufd xmm3, xmm1, 85                  # xmm3 = xmm1[1,1,1,1]
        vpaddd  xmm1, xmm1, xmm3
        vmovd   edx, xmm1
        vextracti128    xmm1, ymm2, 1
        vpaddd  xmm1, xmm2, xmm1
        vpshufd xmm2, xmm1, 238                 # xmm2 = xmm1[2,3,2,3]
        vpaddd  xmm1, xmm1, xmm2
        vpshufd xmm2, xmm1, 85                  # xmm2 = xmm1[1,1,1,1]
        vpaddd  xmm1, xmm1, xmm2
        vmovd   esi, xmm1
        add     esi, edx
        vpshufd xmm1, xmm0, 238                 # xmm1 = xmm0[2,3,2,3]
        vpaddd  xmm0, xmm0, xmm1
        vpshufd xmm1, xmm0, 85                  # xmm1 = xmm0[1,1,1,1]
        vpaddd  xmm0, xmm0, xmm1
        vmovd   edx, xmm0
        bt      edi, 28
        adc     edx, esi
        bt      edi, 30
        adc     eax, 0
        add     eax, ecx
        add     eax, edx
        vzeroupper
        ret
popcount2(unsigned int):                          # @popcount2(unsigned int)
        popcnt  eax, edi
        ret
```

https://github.com/llvm/llvm-project/pull/95625


More information about the libc-commits mailing list