<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/124309>124309</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
x86-64 backend refuses to put i128 in SIMD registers when only bitops are used
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
orlp
</td>
</tr>
</table>
<pre>
Consider these two Rust functions:
```rust
#[unsafe(no_mangle)]
pub fn num(x: &mut [u128; 4]) {
let a = x[0];
let b = x[1];
let c = x[2];
x[3] = (a & b) | (!a & c);
}
#[unsafe(no_mangle)]
pub fn autovec(x: &mut [[u8; 16]; 4]) {
let a = x[0];
let b = x[1];
let c = x[2];
x[3] = std::array::from_fn(|i| (a[i] & b[i]) | (!a[i] & c[i]));
}
```
It compiles to this LLVM IR:
```llvm
define void @num(ptr noalias nocapture noundef align 16 dereferenceable(64) %x) unnamed_addr {
start:
%a = load i128, ptr %x, align 16
%0 = getelementptr inbounds i8, ptr %x, i64 16
%b = load i128, ptr %0, align 16
%1 = getelementptr inbounds i8, ptr %x, i64 32
%c = load i128, ptr %1, align 16
%_5 = and i128 %b, %a
%_7 = xor i128 %a, -1
%_6 = and i128 %c, %_7
%2 = getelementptr inbounds i8, ptr %x, i64 48
%3 = or disjoint i128 %_6, %_5
store i128 %3, ptr %2, align 16
ret void
}
define void @autovec(ptr noalias nocapture noundef align 1 dereferenceable(64) %x) unnamed_addr personality ptr @rust_eh_personality {
start:
%0 = getelementptr inbounds i8, ptr %x, i64 16
%1 = getelementptr inbounds i8, ptr %x, i64 32
%2 = getelementptr inbounds i8, ptr %x, i64 48
%3 = load <16 x i8>, ptr %x, align 1
%4 = load <16 x i8>, ptr %0, align 1
%5 = load <16 x i8>, ptr %1, align 1
%6 = and <16 x i8> %4, %3
%7 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%8 = and <16 x i8> %5, %7
%9 = or disjoint <16 x i8> %8, %6
store <16 x i8> %9, ptr %2, align 1
ret void
}
```
So far so good. However we get this x86-64-v4 output assembly:
```asm
num:
mov rax, qword ptr [rdi]
mov rcx, qword ptr [rdi + 8]
mov rdx, qword ptr [rdi + 24]
and rdx, rcx
mov rsi, qword ptr [rdi + 16]
and rsi, rax
andn rcx, rcx, qword ptr [rdi + 40]
or rcx, rdx
andn rax, rax, qword ptr [rdi + 32]
or rax, rsi
mov qword ptr [rdi + 56], rcx
mov qword ptr [rdi + 48], rax
ret
autovec:
vmovdqu xmm0, xmmword ptr [rdi]
vmovdqu xmm1, xmmword ptr [rdi + 16]
vpternlogq xmm1, xmm0, xmmword ptr [rdi + 32], 226
vmovdqu xmmword ptr [rdi + 48], xmm1
ret
```
I don't understand why they're not identical; why the autovec code is so much better.
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzMV0mP2zgT_TX0pdANidTmgw9tdxpfgOSSAN_VoMSyzYxEOiTl5d8PSHptS1lmBpgxDNhSvVcbX4kUt1auFeKM5HOSv0547zbazLRpt5Nai-NsoZWVAg24DVoEt9fwpbcOVr1qnNTKEvZCkvAtkvg1vXX-mjKSz3tl-QoJrZRedlytWyR0SvJXkrxs-xpWClTfEVodCHsBQouud-BpKa0Im0PmoXQKpJyT5AUAoEUHHAh7hQPJ54m3s1tbfbGlD7bmYqN3Nn-Hkfw1mAmtuM8E6hh44e8QmsabjU8_EEn5eir8FwvlvdM7bB6L9exQbVrEtP6Nsq0TfinZCzeGH-PfldHdcqV8_eVCnlrBST6XgRV6FC_eteoW0lwh73t3Vkzs40cHje62skULToPbSAufPv3_M3z88iiytt11JHkRuJIKYaelAJIlUUtbZ0Bp3kpuQemGb11vEJTulcAV8FauFaQFCDS4QoOqQV775aqKLNRB84P_7ZXiHYolF8KcVsI6blxMBjwurkiruQDpFUsX4INHD4tLqDM8CfA1OmyxQ-U8VqraJ2ZBPtBlkZ3IhOb1aKhkKFT6-6EYPZOb0VjpUKxlHvBcRXjI1uN8gy6YMopQmwuGe8xTekEUD16ak5dleQbR3y8rq85kFsjagJD2m5bKXQIti3OkPICt0wYvVnbjlj40wKAL-rt9JLyT5XXyf0mav6fMLRqrFW-lO8Yks_AEXuJmeWsaFvBfVuTf1tg_sJJBoIQt0gIOnsI-jAzgmZb9nJYM0PKf09IB2lXPd6yQyElu7Iy9TscDNqiPsIWs_LD4VvwH_rAP58yr8SrzU5WX6Z0-DOADpzpxgsjiGD5gpiPzODKO97vMVw0rbsBqWGstnuF_eo87NLBHL8a46xyq4qnInnYZ6N5tewfcWuzq9vi4D3HrtyG_75yGKn46vQPDgwa_77URMd18boSMZ4JboP-YZhAMhM6heqQYMQ6n2T3er00IETk-0kB8K0cdxmPJgMPI8XXeGdVNQT-qK0vu_WoDt73w-Q74jV0daW7wy-iI3xPVyoEGDHrKQ-WjTRuuqjpzYl8MuqiZ8y5wJ5Rdp3fiew-HrguPnkPX_VAvN_h0GD-4ZLutQ6Navf4er2_oI2FvOkkXQGkxnMQPOxCiXFvw7rgHQitCSwd-8zPWeVntN0f_pnEktAz7ogMpUDnZ8Nafi0_m81EaGi0QpPXT3PXNBmp0Ds3zRMyYmLIpn-AsLVlZsjIvqslmhlXFGC0qXuUlT5MkowUTWcNwShNWpjiRM5rQPElplhY5zcrnbMrrdJokvK6qumQVyRLsuGyf_QH0WZv1RFrb4yylGUumk5bX2NrwNkWpwj0EK6G-jxMz86Snul9bkiWttM5e3TjpWpzFJw_UvPkDlQCDq97G47B_DIUDiVTw9ePnVzC4ltahsbDfoAKt2iPU0umtBW4Qeoti0pt2tnFuG97S6Buhb2vpNn393OiO0LdwiI4_T1ujv2HjCH0LKVtC30417Wb0zwAAAP__9yHkig">