<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/101915>101915</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[x86] Chained `blsr`s can be optimized to a `pdep` (since Haswell on Intel, since Zen 3 on AMD)
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
Here is how we could compile a number of blsr's known at compile-time:
```zig
const NUM_BLSR_OPS = 8;
export fn chained_blsr_ops_1(x: u64) u64 {
var r = x;
inline for (0..NUM_BLSR_OPS) |_| {
r &= r -% 1;
}
return r;
}
export fn chained_blsr_ops_2(x: u64) u64 {
if (NUM_BLSR_OPS >= 64) return 0;
return pdep(~@as(u64, 0) << NUM_BLSR_OPS, x);
}
fn pdep(a: u64, b: u64) u64 {
return struct {
extern fn @"llvm.x86.bmi.pdep.64"(u64, u64) u64;
}.@"llvm.x86.bmi.pdep.64"(a, b);
}
```
```asm
chained_blsr_ops_1: ; bad version
blsr rax, rdi
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
ret
chained_blsr_ops_2: ; good version
mov rax, -256
pdep rax, rax, rdi
ret
```
Here is how we could compile a loop of blsr's:
```zig
export fn chained_blsr_ops_1(x: u64, num_blsr_ops: u64) u64 {
var r = x;
for (0..num_blsr_ops) |_| {
r &= r -% 1;
}
return r;
}
export fn chained_blsr_ops_2(x: u64, num_blsr_ops: u64) u64 {
const r = pdep(~@as(u64, 0) << @truncate(num_blsr_ops), x);
return if (num_blsr_ops >= 64) 0 else r;
}
fn pdep(a: u64, b: u64) u64 {
return struct {
extern fn @"llvm.x86.bmi.pdep.64"(u64, u64) u64;
}.@"llvm.x86.bmi.pdep.64"(a, b);
}
```
```asm
chained_blsr_ops_1: ; bad version
mov rax, rdi
test rsi, rsi
je .LBB0_6
mov ecx, esi
and ecx, 7
cmp rsi, 8
jb .LBB0_4
and rsi, -8
.LBB0_3:
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
blsr rax, rax
add rsi, -8
jne .LBB0_3
.LBB0_4:
test rcx, rcx
je .LBB0_6
.LBB0_5:
blsr rax, rax
dec rcx
jne .LBB0_5
.LBB0_6:
ret
chained_blsr_ops_2: ; good version
mov rax, -1
shlx rax, rax, rsi
pdep rcx, rax, rdi
xor eax, eax
cmp rsi, 64
cmovb rax, rcx
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzsV02P2zYQ_TX0ZbACRX1YOuiwtrtIgaQtGrSHXgxKGtlMJNIgKa-aQ397QUm2Zdmb3RQIkEONxRLmcN7MPA7fwNwYsZOIGYlWJNoseGv3Smd_8lqUXH9e5Kr8O3uHGkEY2KtneEYoVFuXUKjmIGoEDrJtctSgKshrowlbGvgs1bMEbk-nHqxokASPhG4IPf2P6fD3ReyGnUJJY-GXPz5sV-8__r799bePQIINJCRYTR2xOyhtoZJQ7LmQWG5d3K06mK1PWNKR4BHaOCQsdQuQ5egNAHDkGnSP2p1R3b6QtZAIldJAWEI9b5qFQyLL9ZYs19do7uMcYgeo4YGwCPwrXLLcXL5otK2WoC_1nKyvFsZeK0xULvEZdz-5vAaHMTa9ym7cPJR4ICz5h4SUG8KSPsYaaF93sCbBGq7pWENHWPpSGdUZkZ8zXkP-tezHRIzVbWFvOcbOopaOGBJSwlhdHxuvS2Ivb4TnYnkOl11Sv8S51EuWG-81dz5kere2U7ve7WFumrGHb1syeAQgwQpyXsIRtRFKjim5Q335vHOBdSmu657befdG-392_N52jXbK350-P7G1U2pO1_hp1BEm4A8siq8PuCudRb_H7iWXe1f7iujVSh2mkjcXtxck7s3atXayejZ-s6RdhOwK5kcTsrdXOcyGoc436BUJqdWtLLhFwpIZBzcCdipoENHp6WsRpYC1wZfr_l_5bpXvBembPeKbx2nR2N5uRG83M_snHFbv_WpFt_EMFYseFedeXJZT-_LaWjQHmMRMZhHzacTwPvDo-jD6DmeDszh8P2H9YRX_RFBZzsk5EytxQmwwZS68MDfvi-EG3fJ6XwxfohuwF5MusYBTlK_kGk3h4wv826bcN445_9ps9nUHt0Nu3vHnWVjcn4Wd0sOLGAw4v7fZm4jDuVkd80kWxe2gPwnFosyCMg1SvsDMXzKWMN_3_cU-K8siD4MwKKqCp1XMopgnVZomWPAkrbBaiIxRFtKERtRnke97QZL7aVVQLNIojIOUhBQbLmqv1zeldwthTIuZT_3UjxY1z7E2_U8cxiQ-Q291uhdtFjpzTg95uzMkpLUw1lxgrLB1_9uoS2ISbWA93CSQmPaDP6YGCi4hR1AHKxrxBUuwCrg70Y-CmLqRYoQsEN5x84x1DUrCz9Ji7RgbLH-hhMDtP37YEJYuWl1ne2v7kUjYE2FPO2H3be4VqiHsyeU3Lg8HrT5hYQl76qsyhD2NZR8z9m8AAAD__4Ohtb8">