<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/148422>148422</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
RVV intrinsics not the best register allocation
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
camel-cdr
</td>
</tr>
</table>
<pre>
While a lot better than GCC, LLVM currently produces a good amount of redundant moves for the following RVV function: https://godbolt.org/z/hnYKedzM9 (43 vs 32 instructions)
```c
vuint16m8_t
trans8x8_vslide(vuint16m8_t v)
{
size_t VL = __riscv_vsetvlmax_e64m4();
vbool16_t modd = __riscv_vreinterpret_b16(
__riscv_vmv_v_x_u8m1(0b10101010, __riscv_vsetvlmax_e8m1()));
vbool16_t meven = __riscv_vmnot(modd, VL);
vbool16_t m;
vuint64m4_t v4l = __riscv_vreinterpret_u64m4(__riscv_vget_u16m4(v, 0));
vuint64m4_t v4h = __riscv_vreinterpret_u64m4(__riscv_vget_u16m4(v, 1));
vuint64m4_t v4lt = v4l;
m = modd;
v4l = __riscv_vslide1up_mu(m, v4l, v4h, 0, VL);
m = meven;
v4h = __riscv_vslide1down_mu(m, v4h, v4lt, 0, VL);
vuint32m2_t v2ll = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4l, 0));
vuint32m2_t v2lh = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4l, 1));
vuint32m2_t v2hl = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4h, 0));
vuint32m2_t v2hh = __riscv_vreinterpret_u32m2(__riscv_vget_u64m2(v4h, 1));
vuint32m2_t v2llt = v2lh, v2hlt = v2hh;
v2lh = __riscv_vslide1down_mu(m, v2lh, v2ll, 0, VL);
v2hh = __riscv_vslide1down_mu(m, v2hh, v2hl, 0, VL);
m = modd;
v2ll = __riscv_vslide1up_mu(m, v2ll, v2llt, 0, VL);
v2hl = __riscv_vslide1up_mu(m, v2hl, v2hlt, 0, VL);
vuint16m1_t v1lll = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2ll, 0));
vuint16m1_t v1llh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2ll, 1));
vuint16m1_t v1lhl = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2lh, 0));
vuint16m1_t v1lhh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2lh, 1));
vuint16m1_t v1hll = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hl, 0));
vuint16m1_t v1hlh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hl, 1));
vuint16m1_t v1hhl = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hh, 0));
vuint16m1_t v1hhh = __riscv_vreinterpret_u16m1(__riscv_vget_u32m1(v2hh, 1));
vuint16m1_t v1lllt = v1lll, v1lhlt = v1lhl, v1hllt = v1hll, v1hhlt = v1hhl;
v1lll = __riscv_vslide1up_mu(m, v1lll, v1llh, 0, VL);
v1lhl = __riscv_vslide1up_mu(m, v1lhl, v1lhh, 0, VL);
v1hll = __riscv_vslide1up_mu(m, v1hll, v1hlh, 0, VL);
v1hhl = __riscv_vslide1up_mu(m, v1hhl, v1hhh, 0, VL);
m = meven;
v1llh = __riscv_vslide1down_mu(m, v1llh, v1lllt, 0, VL);
v1lhh = __riscv_vslide1down_mu(m, v1lhh, v1lhlt, 0, VL);
v1hlh = __riscv_vslide1down_mu(m, v1hlh, v1hllt, 0, VL);
v1hhh = __riscv_vslide1down_mu(m, v1hhh, v1hhlt, 0, VL);
return __riscv_vcreate_v_u16m1_u16m8(
v1lll, v1llh, v1lhl, v1lhh,
v1hll, v1hlh, v1hhl, v1hhh);
}
```
This is probably quite a hard problem, but I thought I'd share it anyway. Maybe slight improvements for this example can have a compounding effect on mask register allocation.
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJykV11vqzgQ_TXOy6gRGELIAw-9rbK62vZltcpqn5ABJ_ausbO2oU1__cqEkA8-Gt2oqFSD5_jMmWPoEGP4TlKaoMUPtHidkcoypZOclFQ85YWeZao4JH8xLigQEMpCRq2lGiwjEn57eUH4Bd7eNu-QV1pTacUB9loVVU4NENgpVQApVSUtqC1oWlSyINJCqWpqYKscEIWtEkJ9cLmDPzYb2FYyt1xJFDwDs3ZvUPCM8Brh9U4VmRJ2rvQO4fUXwmsm__6dFl_vK0A4DgOoDQQYuDRWVw2IQXiFvGd3Rd7xypH3XFdcWj8q49Qi79lqIk38Gae1EbygCMcXz6FuIZY_GqCV4V80tbB5AxS8QppqbvI6rQ21tSjJZ0qjsAwRjl1a0ObUmVLCj1JXeVHAdaamXFqq95raNPMjl9okHa9uWVmndfqZVnHpIxx7me8df1wLBlgclzkSx2uACq2pvKZSSmURjh1JB7t5G8k8xZqw08rV7LQKxXhtVStM93Dngn7UBGu3n3fL9QqaPQLtT0IL22DXoegWlE2kUaLLua2usYtf7dOycqq5fRxEc2NtQbcqtrhO-wtgNgRcqA95Bc3aHewwdldXgEvs6sJiqh1uVU-zKGyCbRmDDTmjT3XkDvTBnnTo7Ne5s--5s1_nzr7nLk6GwuLYM8y6CGPnpJ6Ew43vUIQYcVW_oBEk1vGZ9ueV73s-GjJ-S64pfpzlPUBMdJp9Y3Q_Kn0nuC8mne6W9foZ4CZ4IWq_oRf4U365B3_QMGf8Sbd_gz9u9wv8B_iPG77DZw_oz-7Qnz2gP7tDf_aA_uwO_dkD-rM79BfdG8f92Rwf56ku1h4p16dTjJ3WsfM6xs4fwIFDNXRaL_YTY9-8AX8PQ7GO-jhUz2qDUOfqJlj1uj4M1Yk3ymrwo95_aQy_k0_CHbs4IeGdaIyd-z-h4n1o7MSNTXHr2XsM7cSNTb7bNbWVlme0XFNiaVofT0rzO775N7nvw56drpffGuS2zydCy9fL4eFI8U_GDXDjRp2MZOIA_1XcugmJEV00UUGbirPKwk-wTFU7ZuEnwssCDCOaArdA5OGDHObwTg4ZBSO4W8PLvVY1Lam0pwGJG6CfpNwLCjmRwEjttspVuVeVLNzYRLdbmltQEkpi_gVNd9y4KY0IoXLi5qD5rEiCYhWsyIwm_nKBQy_wV_GMJf7SK5bxItoWK7ylJPeDxTIIokXobaOo8MiMJ9jDC2_pB74XrBbxnOJsFURRkC2iIvR8jEKPloSLuRB16YazGTemookfxiHGM0EyKkwzYWIs6Qc0TxHGbuDUiUt6yqqdQaEnuLHmDGO5FTRxMyGXVnNpeG5AKtsMjRk1dqjSWaVFcjM3csuqbJ6rEuG1Q29vT3ut_qG5RXjdcDIIr1vSdYL_DwAA___QLaqv">