<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/129218>129218</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            codegen for the popcount like loop is bad
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          DenisYaroshevskiy
      </td>
    </tr>
</table>

<pre>
    On [twitter](https://x.com/lemire/status/1895297881592209657) people complained about the codegen for this loop

```
#include <cstdint>

std::uint32_t count(std::uint64_t bits) {
    std::uint32_t used = 0;
    for (std::uint32_t i = 0; i < 64; ++i) {
        if (bits & (1ull << i)) {
 ++used;
        }
    }
    return used;
}
```
https://godbolt.org/z/vo4MK4hGq

It indeed looks questionable

```
count(unsigned long):
        vpbroadcastd    xmm0, edi
        vpbroadcastq    ymm1, rdi
        vpmovqd xmm2, ymm1
 vpsrlvd xmm3, xmm0, xmmword ptr [rip + .LCPI0_0]
        vinserti128 ymm4, ymm2, xmm2, 1
        mov     eax, edi
        shr     eax, 30
 mov     ecx, edi
        vpsrlvq ymm5, ymm1, ymmword ptr [rip + .LCPI0_1]
        vpsrlvq ymm6, ymm1, ymmword ptr [rip + .LCPI0_2]
 vpsrlvq ymm7, ymm1, ymmword ptr [rip + .LCPI0_3]
        vpsrlvq ymm8, ymm1, ymmword ptr [rip + .LCPI0_4]
        vpsrlvq ymm9, ymm1, ymmword ptr [rip + .LCPI0_5]
        shr     ecx, 31
        mov     rdx, rdi
        vpsrlvq ymm10, ymm1, ymmword ptr [rip + .LCPI0_6]
        vpackusdw       ymm9, ymm10, ymm9
        vpsrlvd xmm10, xmm2, xmmword ptr [rip + .LCPI0_7]
 vpsrlvd ymm11, ymm4, ymmword ptr [rip + .LCPI0_8]
        vpsrlvd ymm4, ymm4, ymmword ptr [rip + .LCPI0_9]
        vpsrlvd xmm2, xmm2, xmmword ptr [rip + .LCPI0_10]
        shr     rdx, 32
        vpmovqd xmm8, ymm8
 vpmovqd xmm7, ymm7
        vinserti128     ymm7, ymm8, xmm7, 1
 vpmovqd xmm6, ymm6
        vpmovqd xmm5, ymm5
        vinserti128     ymm5, ymm6, xmm5, 1
        vpermq  ymm6, ymm9, 216
        vpsrld  xmm0, xmm0, 1
        vpinsrd xmm0, xmm0, edi, 1
        vpunpcklqdq     xmm0, xmm0, xmm3
        vpbroadcastd    ymm8, eax
        vpmovsxbq       ymm9, dword ptr [rip + .LCPI0_14]
        vpermi2q        ymm9, ymm3, ymm8
 vpbroadcastd    ymm3, edx
        vpblendd        ymm3, ymm9, ymm3, 128
 vinserti128     ymm8, ymm0, xmm10, 1
        vpblendd        ymm2, ymm3, ymm2, 15
        vpbroadcastd    ymm3, dword ptr [rip + .LCPI0_12]
 vpand   ymm2, ymm2, ymm3
        vpand   ymm5, ymm5, ymm3
        vpand ymm6, ymm6, ymm3
        vpand   ymm4, ymm4, ymm3
        vpaddd  ymm4, ymm4, ymm6
        vpand   ymm6, ymm7, ymm3
        vpand   ymm7, ymm11, ymm3
        vpaddd  ymm6, ymm7, ymm6
        vpaddd  ymm2, ymm6, ymm2
 vpand   ymm6, ymm8, ymm3
        vpsrlvq ymm1, ymm1, ymmword ptr [rip + .LCPI0_13]
        vpmovqd xmm1, ymm1
        vpand   xmm1, xmm1, xmm3
 mov     rax, rdi
        shr     rax, 63
        vpand   xmm0, xmm0, xmm3
        vpaddd  xmm0, xmm0, xmm1
        vpblendd        ymm0, ymm6, ymm0, 15
        vpaddd  ymm0, ymm0, ymm5
        vpaddd  ymm0, ymm0, ymm4
        vpaddd  ymm0, ymm0, ymm2
        vextracti128    xmm1, ymm0, 1
        vpaddd  xmm0, xmm0, xmm1
        vpextrd edx, xmm0, 1
 vmovd   esi, xmm0
        add     esi, edx
        vpextrd edx, xmm0, 2
 vpextrd r8d, xmm0, 3
        add     r8d, edx
        add     r8d, esi
        bt      rdi, 62
        adc     eax, 0
        bt      rdi, 61
        adc     eax, r8d
        add     eax, ecx
        vzeroupper
 ret
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJyUV8tu6zYTfhp6Q5yAInVdaOEkv38ctEW77SqgRMZmI4kySSnOefqCupm62HENA7rMzDcz3wyHItVaHCvOUxA8g-B1Rxtzkip95ZXQf1Ml9Ym3-kN87TLJvtI_KwiCZ_MpjOEKBK8Axydjag3IHuADwIfLUy5LgA8FL4XiAB-0oabRAB-8OAlwEsWxFyQYoyQMIoATWHNZFxzmsqwLKirOIM1kY6A52ZeMH3kF36WC5iQ0LKSsAdrbf4iGP9oDTESVFw3jEJCXXBsmKgPI_3pNbZiNjuwbURmC3wzMZVMZgOOZJPTfDMyE0TYoED0DtIcQwrV1ozmDgLxCBMioZQNcAnbKYtLsbl9g6Nt7gJ8BfhZzV_Yn3i2ODQMCHNp7rykKa2mNrYFj06PYeK6R2B-IXoen653iplEVvCr3IpfGeSGPkmWyME9SHQE-_AL40Er_j9_80__PPa8_DRQV45zZqnxoeG64NkJWNCv4ukYj503VtZs1qo42HbJ3Im_rTEnKcqoNs8-XskQAv0DOxLbW2T5_laVntdRCq5TtmVkMbKWdFtrDttaqaLv3xL4ffVzK8lMqBmujbI8rUVuC4dPvL3_9RG_INrsDLirNlREeji2wPzjAA1B39Rz9UrbdldPLOh99Uq6QWL6uFvmGRZ_D2boMptz6650cvEUOV5DwcRA8gDjW0ePW5GYI8eMg_k2Q5HGQYA4y1aCnm2wVT7HLVptN3j30uPtwmQPNPxrNPodnN5cRNVl57ZrYQ27T3e_iaFY81sGPwfrfBx1vEc9c4wdAkm2Qy2L13E_EQ9vlG0pE8PYgGJss7kmY3o8dHN1Y4kNJosm8DzCalrmDNa6lcDuEcb0G910FE0rvKlhOlLbmqjxDd_V2HYO9cMkug-6QQ2skUWnFVjp25KxUm6rOP4oz6wbvyqSbqbfH-UidHXQLcvQlOy96n92r_2oGcFUKPEK464fMa76KiPS5ziPKCl4x5qARl-Px2cM96Lp8Y6ONrHgbrK984EXE_S4S3CeUfEvVdWLTis09XT3ORtGodu3VW2pfbr_fxVpOiIUas0xsqIXbaOG0YO86nXYm777XJVy4rYYXqeIFraE7H1bervvEf9iyVxvmNEg896Nmkfoodq7E_ahQdGMnm2ZoLwy3OP12wfdcbah90_toQS3a6P2pDshVWs7S22r-Y2qzvYNfjKL5tLYd3jdW9IPJW0zWjZzlRG5L2VpWuBaT7GpJB8YG6XJmbcIOLdrLVMxcGdnAHlTm2EuhdtsmM_1V9dtFiGeGeR9x31Lojpl328y63WBh-JTOZyz84ko2dc2Vfam4cY8hO5YSlpCE7njqRT6KSIj8YHdKGWIhRl4cvgfvie-j_D2mNAhCQoOE-0GyEylGOEAYxyjxIxw8-fG7T4M8JkGcMRYz4CNeUlE8FUVb2jPTTmjd8NTDCfbiXUEzXujuiI1xxT9hJwXYjuadSq3Rj6w5auCjQmijrzBGmIKn84Mwh7WsuwMVLMQH707FUGiYUbZrVJEuDnLCnJpsPJYX7Xj5USv5D88NwIcumu6M3ofbpvjfAAAA__-MxIc8">