<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/67595>67595</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
RISC-V generates worse code than AArch64 for simple memset style loop at -Os
</td>
</tr>
<tr>
<th>Labels</th>
<td>
backend:RISC-V,
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
hiraditya
</td>
</tr>
</table>
<pre>
Derived from: https://github.com/llvm/llvm-project/issues/66652
```cpp
#include<stdint.h>
#include<stddef.h>
void fill_i16(int16_t* a, int16_t v, size_t l) {
for (size_t i = 0; i < l; i++) a[i] = v;
}
```
riscv-clang -Os -march=rv64gcv_zba_zbb_zbs
```asm
fill_i16: # @fill_i16
beqz a2, .LBB0_5
not a4, a2
csrr a7, vlenb
bgeu a4, a7, .LBB0_3
.LBB0_2: # =>This Inner Loop Header: Depth=1
sh a1, 0(a0)
addi a2, a2, -1
addi a0, a0, 2
bnez a2, .LBB0_2
j .LBB0_5
.LBB0_3:
li a4, 0
srli a6, a7, 3
neg a5, a7
add a3, a7, a2
addi a3, a3, -1
and a5, a5, a3
vsetvli a3, zero, e16, m2, ta, ma
vmv.v.x v8, a1
slli a1, a6, 4
vsetvli zero, zero, e64, m8, ta, ma
vid.v v16
.LBB0_4: # =>This Inner Loop Header: Depth=1
vsaddu.vx v24, v16, a4
vmsltu.vx v0, v24, a2
vse16.v v8, (a0), v0.t
add a4, a4, a7
add a0, a0, a1
bne a5, a4, .LBB0_4
.LBB0_5:
ret
```
arm-clang -Os -march=armv8-a+sve
```asm
fill_i16: // @fill_i16
cbz x2, .LBB0_3
cnth x8
mov z0.h, w1
mov x10, xzr
subs x9, x2, x8
csel x9, xzr, x9, lo
whilelo p0.h, xzr, x2
.LBB0_2: // =>This Inner Loop Header: Depth=1
st1h { z0.h }, p0, [x0, x10, lsl #1]
whilelo p0.h, x10, x9
add x10, x10, x8
b.mi .LBB0_2
.LBB0_3:
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJykVl2P6yYT_jXkBsWywV-58EWyPqv3SEd6pbbqbYTNJOYU2ykQNptfX4HtJPbuntW2UcQE5mGGeWaYwLQWxw6gQMkOJeWKnU3Tq6IRinFhXtmq6vlrUYISFjg-qL5FdIsbY04a0S0iz4g8H4VpzlVQ9y0iz1LaSaxPqv8JtUHkWWh9Bo3Ic5qmCUFhicLtOKbh8K1Pp3GFUNHV8swB0SdtuOhM0CD67X0th8OD1vaC44OQci-iFJFcdCZK9waRLWaIPOFxjq2baHGFvcESkQ1G2W6wgPGhVxiRfNQKjGiJQ0R3_ucTlv4nIjv_3WCGkp1ASelxFtHRDsrKRYCzqP2ohK7tupasO-L1_zVet0zVDaKlsml8rO3-WrH9tar210q_yxnT7bByC5luMSIUozi8LY1hDZ8K_r46yYhjIPix24X7ZA7peuMlix2Ekbm21kp5bea0VkJXLTwc4fywPbv7oQNwmBB31E8-PhJaIvrtj0Zo_L3rQOEffX_C_wPGQTkTJZyMoyyan0I3g2SR8x8ikrMQkc0cxDgXdzKGcR19gAk9xo8LSqoO3nC6gPycUz3xQbdzmBT4gfpwEZIa1Cy9E0sXqYPjsD8ZIctQBknvBpbp9eGOAPouHx1_dJGMyBnGajBWTnauoHonIfIHbz1Lxl_Hli02tjawwQXb3Jtd5lSOBPicDjTEI2RyOTm7OU09lW3-C6eCB9bJ6a4M2Ylvd-mLFWg14_wc2Mtkn_gj2CF8Fi9D1tI8on2FjXtu2bEaojSwIzG3anbIMDAfpDke_f2qFB6qekl31cFjpuN7ccePPCX3KlZgPu15TLXvdTymWpuvGSI7beErvc79AX3c7urK30x8IW_a0A3SGd8rLvm43vbWK65h0LhdLwteJv0l8rRdrmpRpudKe73Pz-D5ZnxyqkE-YK7KCz-R_Rz60ggJssen8TgTmHy9mw5c_cuGaiJPE8p2nhns_uDIEz55ElCyuwxsDKRILd3diVBSfhLNSOLbzjzjeBQLFqugFfN--1FjfVuZK15QvqEbtoIiSjdJHtOQklVTQBRFnJOc1FVMIgCSxFCnSU7iNMv4IVmJgoSEhhuSRRu3J6AxRBsIyYFWdZ5lOYpDaJmQgXsEBb06rvzzp0izZJOsJKtAav_eIqRi9V_QcUS3v33__Wn9JyKuXBAhrdAa-Lo_GdGKKzOi75wuKVeq8G-r6nzUKA6l0EbfHRlhJBSDLXyEDhQzoPFLrzTguueATcM6vN2quklj_9TRoj1JwC20GgzW5lUClq4kmHFXdHVWsvgP7z0X8z8BAAD__1jTqkM">