<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/149298>149298</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
clang missed vectorization optization
</td>
</tr>
<tr>
<th>Labels</th>
<td>
clang
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
rockeet
</td>
</tr>
</table>
<pre>
```c++
#include <string.h>
#include <utility>
#define extent(a) sizeof(a)/sizeof(a[0])
struct alignas(16) B {
int a[12];
};
void B_swap1(B* x, B* y) {
B t;
memcpy(&t, x, sizeof(B));
memcpy( x, y, sizeof(B));
memcpy( y, &t, sizeof(B));
}
void B_swap2(B* x, B* y) {
for (int i = 0; i < extent(x->a); i++) {
std::swap(x->a[i], y->a[i]);
}
}
```
### clang generates (-O3)
```nasm
B_swap1(B*, B*):
movaps xmm0, xmmword ptr [rdi]
movaps xmm1, xmmword ptr [rdi + 16]
movaps xmm2, xmmword ptr [rdi + 32]
movaps xmmword ptr [rsp - 24], xmm2
movaps xmmword ptr [rsp - 40], xmm1
movaps xmmword ptr [rsp - 56], xmm0
movaps xmm0, xmmword ptr [rsi]
movaps xmm1, xmmword ptr [rsi + 16]
movaps xmm2, xmmword ptr [rsi + 32]
movaps xmmword ptr [rdi + 32], xmm2
movaps xmmword ptr [rdi + 16], xmm1
movaps xmmword ptr [rdi], xmm0
movaps xmm0, xmmword ptr [rsp - 56]
movaps xmm1, xmmword ptr [rsp - 40]
movaps xmm2, xmmword ptr [rsp - 24]
movaps xmmword ptr [rsi + 32], xmm2
movaps xmmword ptr [rsi + 16], xmm1
movaps xmmword ptr [rsi], xmm0
ret
B_swap2(B*, B*):
mov eax, dword ptr [rdi]
mov ecx, dword ptr [rsi]
mov dword ptr [rdi], ecx
mov dword ptr [rsi], eax
mov eax, dword ptr [rdi + 4]
mov ecx, dword ptr [rsi + 4]
mov dword ptr [rdi + 4], ecx
mov dword ptr [rsi + 4], eax
mov eax, dword ptr [rdi + 8]
mov ecx, dword ptr [rsi + 8]
mov dword ptr [rdi + 8], ecx
mov dword ptr [rsi + 8], eax
mov eax, dword ptr [rdi + 12]
mov ecx, dword ptr [rsi + 12]
mov dword ptr [rdi + 12], ecx
mov dword ptr [rsi + 12], eax
mov eax, dword ptr [rdi + 16]
mov ecx, dword ptr [rsi + 16]
mov dword ptr [rdi + 16], ecx
mov dword ptr [rsi + 16], eax
mov eax, dword ptr [rdi + 20]
mov ecx, dword ptr [rsi + 20]
mov dword ptr [rdi + 20], ecx
mov dword ptr [rsi + 20], eax
mov eax, dword ptr [rdi + 24]
mov ecx, dword ptr [rsi + 24]
mov dword ptr [rdi + 24], ecx
mov dword ptr [rsi + 24], eax
mov eax, dword ptr [rdi + 28]
mov ecx, dword ptr [rsi + 28]
mov dword ptr [rdi + 28], ecx
mov dword ptr [rsi + 28], eax
mov eax, dword ptr [rdi + 32]
mov ecx, dword ptr [rsi + 32]
mov dword ptr [rdi + 32], ecx
mov dword ptr [rsi + 32], eax
mov eax, dword ptr [rdi + 36]
mov ecx, dword ptr [rsi + 36]
mov dword ptr [rdi + 36], ecx
mov dword ptr [rsi + 36], eax
mov eax, dword ptr [rdi + 40]
mov ecx, dword ptr [rsi + 40]
mov dword ptr [rdi + 40], ecx
mov dword ptr [rsi + 40], eax
mov eax, dword ptr [rdi + 44]
mov ecx, dword ptr [rsi + 44]
mov dword ptr [rdi + 44], ecx
mov dword ptr [rsi + 44], eax
```
### g++ generates (-O3)
```nasm
B_swap1(B*, B*):
movdqu xmm3, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
movdqu xmm1, XMMWORD PTR [rdi+16]
movdqu xmm0, XMMWORD PTR [rdi+32]
movups XMMWORD PTR [rdi], xmm3
movdqu xmm3, XMMWORD PTR [rsi+16]
movups XMMWORD PTR [rdi+16], xmm3
movdqu xmm3, XMMWORD PTR [rsi+32]
movups XMMWORD PTR [rdi+32], xmm3
movups XMMWORD PTR [rsi], xmm2
movups XMMWORD PTR [rsi+16], xmm1
movups XMMWORD PTR [rsi+32], xmm0
ret
B_swap2(B*, B*):
movdqa xmm0, XMMWORD PTR [rdi]
movdqa xmm1, XMMWORD PTR [rsi]
movaps XMMWORD PTR [rdi], xmm1
movdqa xmm1, XMMWORD PTR [rsi+16]
movaps XMMWORD PTR [rsi], xmm0
movdqa xmm0, XMMWORD PTR [rdi+16]
movaps XMMWORD PTR [rdi+16], xmm1
movdqa xmm1, XMMWORD PTR [rsi+32]
movaps XMMWORD PTR [rsi+16], xmm0
movdqa xmm0, XMMWORD PTR [rdi+32]
movaps XMMWORD PTR [rdi+32], xmm1
movaps XMMWORD PTR [rsi+32], xmm0
ret
```
Although g++ memcpy did not recognize the alignas in B_swap1, it is far more better than clang.
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJysWFuPozgT_TXOS6lbUIZcHngI6cnbaD6NPmn3beWAQ7zLJYNNT7p__cqQNCSxHYhWijrQOaeqTl2MDZNSZCXnEQljEr7NWKMOVR3VVfIP52q2q9KPiMy97pMQjPXHWxOkokzyJuVA6EaqWpTZ64HQb3e_NUrkQn18_ZTyvSg58JPipSK4ZARXIMUnr_bnO4LbwX0YeyR80__21lLVTaKA5SIrmSS49OeaHQNZ6KAAAESpQJN81Czaxro4X7xXIoX4L_mbHX2Cy5jgGk4EN9BefWhTvaEYVMfSNwUvkuMHwSXBudKMjvcVZtyGvTIQOqA2Pg7eAi9ezASt50oMPhSzr2oguNTJEUDoG3iExu3lpq_E6YXQb6xzA-Jc6t6MVCmha0LX2mUPD2PR1mcDH9f_GMjrIj7_vTRT1w7dB5KclRlkvOQ1U1zqYF9-0K7qX4ySyYJ46-sKXiS3Dtdnh21Oq3d2lACnovA06FQUv6s6haOqgYRxnbZheush0DcDgWAM_vyMv7ePDhpFK-0KLo_wAhicc9kavSMZOYHXc_yxjsJ5T_JukmDMlhRWGcakyeeSJqcl7SrH1rS5KzrInB1_k62RLTZI9YTM9UWdkLe-e0Z0gCVrD_B36Xroxpy1mis91ZdBxhGD3H5z1q5vqXWKO1RiQBm6t_022cJNa2Ng0mRLo9hpfJRtBoNxsV5hH0TcY_u4nRR5Q5ksYmkOzKXFQrGbn6ZladXi0uHfrzEPhXxxnBJ8nK6h50wuiGGNfSzEwnE4MOpxGH9KC3oTRgTvF0m7CPSmF6XnTBdimV-nHgPHYXyilpuJHyXiiVm3cRwOJgqxj_sjPYZthVOLCW_VQnHc0-MGPF3EE-Nu4zgcTCsKtY-884FomWDng_F2iXBYnvhMfH7cgyfG3cZxOBi_BgfX43516hoevLLunPefH73SX027I6Qa9Of373_8-PkG__v_z-vt2ACIJmB6v2-74H0LHmPDs-1C8qwkw6g3elNrjqjb1NJpko2RaScOFU86ooN9ik3GBWV0YSQNN_Q4Dn-twx9LGkZ2f3QYf25IfzFn3e_7hNmby3YEdrWIP82-sUWYoUVsZ6sRmgc-HAqchXssw3aAf9gik8WM9XTX8b4rAyO6cLikrnN1qJrs8LWcdi_0IBUplJWCmidVVopPDurAL-8vQZT928gNCAVCwp7VUFQ1hx1XitegDqzs3o69ztKIpiu6YjMe-YsQF2EYBuHsELHlintLhhjME2-39-bpnK4WYTr35r6_D4KZiNDD0Fv4Cx8pYvi6X6VsFfKAMpomu-WeBB4vmMhf8_y9eK3qbCakbHjkBytcLWc52_Fctq-IEdtgCOrczOpIE152TSZJ4OVCKtmbUELlPOre7BVCSp7CO09UVYtPpkRVQnVU58tZU-fRQamj1COMW4LbTKhDs3tNqoLgVts8f70c6-pvniiC2zZGSXB7DvM9wn8DAAD__27oQlE">