<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/101915>101915</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [x86] Chained `blsr`s can be optimized to a `pdep` (since Haswell on Intel, since Zen 3 on AMD)
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          Validark
      </td>
    </tr>
</table>

<pre>
    Here is how we could compile a number of blsr's known at compile-time:

```zig
const NUM_BLSR_OPS = 8;

export fn chained_blsr_ops_1(x: u64) u64 {
    var r = x;
    inline for (0..NUM_BLSR_OPS) |_| {
        r &= r -% 1;
    }
    return r;
}

export fn chained_blsr_ops_2(x: u64) u64 {
    if (NUM_BLSR_OPS >= 64) return 0;
    return pdep(~@as(u64, 0) << NUM_BLSR_OPS, x);
}

fn pdep(a: u64, b: u64) u64 {
    return struct {
        extern fn @"llvm.x86.bmi.pdep.64"(u64, u64) u64;
 }.@"llvm.x86.bmi.pdep.64"(a, b);
}
```

```asm
chained_blsr_ops_1:  ; bad version
 blsr    rax, rdi
        blsr    rax, rax
        blsr    rax, rax
 blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
 ret

chained_blsr_ops_2:  ; good version
        mov     rax, -256
        pdep    rax, rax, rdi
        ret
```

Here is how we could compile a loop of blsr's:


```zig
export fn chained_blsr_ops_1(x: u64, num_blsr_ops: u64) u64 {
    var r = x;
 for (0..num_blsr_ops) |_| {
        r &= r -% 1;
    }
    return r;
}

export fn chained_blsr_ops_2(x: u64, num_blsr_ops: u64) u64 {
    const r = pdep(~@as(u64, 0) << @truncate(num_blsr_ops), x);
 return if (num_blsr_ops >= 64) 0 else r;
}

fn pdep(a: u64, b: u64) u64 {
    return struct {
        extern fn @"llvm.x86.bmi.pdep.64"(u64, u64) u64;
 }.@"llvm.x86.bmi.pdep.64"(a, b);
}
```

```asm
chained_blsr_ops_1:   ; bad version
 mov     rax, rdi
        test    rsi, rsi
        je      .LBB0_6
 mov     ecx, esi
        and     ecx, 7
        cmp     rsi, 8
        jb      .LBB0_4
        and     rsi, -8
.LBB0_3:
 blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
 blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
        blsr    rax, rax
        add rsi, -8
        jne     .LBB0_3
.LBB0_4:
        test    rcx, rcx
        je      .LBB0_6
.LBB0_5:
        blsr    rax, rax
 dec     rcx
        jne     .LBB0_5
.LBB0_6:
 ret

chained_blsr_ops_2:   ; good version
        mov     rax, -1
        shlx    rax, rax, rsi
        pdep    rcx, rax, rdi
 xor     eax, eax
        cmp     rsi, 64
        cmovb   rax, rcx
 ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsV02P2zYQ_TX0ZbACRX1YOuiwtrtIgaQtGrSHXgxKGtlMJNIgKa-aQ397QUm2Zdmb3RQIkEONxRLmcN7MPA7fwNwYsZOIGYlWJNoseGv3Smd_8lqUXH9e5Kr8O3uHGkEY2KtneEYoVFuXUKjmIGoEDrJtctSgKshrowlbGvgs1bMEbk-nHqxokASPhG4IPf2P6fD3ReyGnUJJY-GXPz5sV-8__r799bePQIINJCRYTR2xOyhtoZJQ7LmQWG5d3K06mK1PWNKR4BHaOCQsdQuQ5egNAHDkGnSP2p1R3b6QtZAIldJAWEI9b5qFQyLL9ZYs19do7uMcYgeo4YGwCPwrXLLcXL5otK2WoC_1nKyvFsZeK0xULvEZdz-5vAaHMTa9ym7cPJR4ICz5h4SUG8KSPsYaaF93sCbBGq7pWENHWPpSGdUZkZ8zXkP-tezHRIzVbWFvOcbOopaOGBJSwlhdHxuvS2Ivb4TnYnkOl11Sv8S51EuWG-81dz5kere2U7ve7WFumrGHb1syeAQgwQpyXsIRtRFKjim5Q335vHOBdSmu657befdG-392_N52jXbK350-P7G1U2pO1_hp1BEm4A8siq8PuCudRb_H7iWXe1f7iujVSh2mkjcXtxck7s3atXayejZ-s6RdhOwK5kcTsrdXOcyGoc436BUJqdWtLLhFwpIZBzcCdipoENHp6WsRpYC1wZfr_l_5bpXvBembPeKbx2nR2N5uRG83M_snHFbv_WpFt_EMFYseFedeXJZT-_LaWjQHmMRMZhHzacTwPvDo-jD6DmeDszh8P2H9YRX_RFBZzsk5EytxQmwwZS68MDfvi-EG3fJ6XwxfohuwF5MusYBTlK_kGk3h4wv826bcN445_9ps9nUHt0Nu3vHnWVjcn4Wd0sOLGAw4v7fZm4jDuVkd80kWxe2gPwnFosyCMg1SvsDMXzKWMN_3_cU-K8siD4MwKKqCp1XMopgnVZomWPAkrbBaiIxRFtKERtRnke97QZL7aVVQLNIojIOUhBQbLmqv1zeldwthTIuZT_3UjxY1z7E2_U8cxiQ-Q291uhdtFjpzTg95uzMkpLUw1lxgrLB1_9uoS2ISbWA93CSQmPaDP6YGCi4hR1AHKxrxBUuwCrg70Y-CmLqRYoQsEN5x84x1DUrCz9Ji7RgbLH-hhMDtP37YEJYuWl1ne2v7kUjYE2FPO2H3be4VqiHsyeU3Lg8HrT5hYQl76qsyhD2NZR8z9m8AAAD__4Ohtb8">