<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/67595>67595</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            RISC-V generates worse code than AArch64 for simple memset style loop at -Os
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:RISC-V,
            missed-optimization
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          hiraditya
      </td>
    </tr>
</table>

<pre>
    Derived from: https://github.com/llvm/llvm-project/issues/66652

```cpp
#include<stdint.h>
#include<stddef.h>
void fill_i16(int16_t* a, int16_t v, size_t l) {
  for (size_t i = 0; i < l; i++) a[i] = v;
}
```


riscv-clang -Os -march=rv64gcv_zba_zbb_zbs

```asm
fill_i16: # @fill_i16
        beqz    a2, .LBB0_5
        not     a4, a2
        csrr    a7, vlenb
        bgeu    a4, a7, .LBB0_3
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        sh      a1, 0(a0)
        addi    a2, a2, -1
        addi    a0, a0, 2
        bnez    a2, .LBB0_2
        j .LBB0_5
.LBB0_3:
        li      a4, 0
        srli    a6, a7, 3
        neg     a5, a7
        add     a3, a7, a2
        addi a3, a3, -1
        and     a5, a5, a3
        vsetvli a3, zero, e16, m2, ta, ma
        vmv.v.x v8, a1
        slli    a1, a6, 4
 vsetvli zero, zero, e64, m8, ta, ma
        vid.v   v16
.LBB0_4: # =>This Inner Loop Header: Depth=1
 vsaddu.vx       v24, v16, a4
        vmsltu.vx       v0, v24, a2
 vse16.v v8, (a0), v0.t
        add     a4, a4, a7
        add     a0, a0, a1
        bne     a5, a4, .LBB0_4
.LBB0_5:
 ret
```


arm-clang -Os -march=armv8-a+sve

```asm
fill_i16: // @fill_i16
        cbz     x2, .LBB0_3
        cnth    x8
 mov     z0.h, w1
        mov     x10, xzr
        subs    x9, x2, x8
        csel    x9, xzr, x9, lo
        whilelo p0.h, xzr, x2
.LBB0_2:                                // =>This Inner Loop Header: Depth=1
        st1h    { z0.h }, p0, [x0, x10, lsl #1]
        whilelo p0.h, x10, x9
        add     x10, x10, x8
        b.mi .LBB0_2
.LBB0_3:
        ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJykVl2P6yYT_jXkBsWywV-58EWyPqv3SEd6pbbqbYTNJOYU2ykQNptfX4HtJPbuntW2UcQE5mGGeWaYwLQWxw6gQMkOJeWKnU3Tq6IRinFhXtmq6vlrUYISFjg-qL5FdIsbY04a0S0iz4g8H4VpzlVQ9y0iz1LaSaxPqv8JtUHkWWh9Bo3Ic5qmCUFhicLtOKbh8K1Pp3GFUNHV8swB0SdtuOhM0CD67X0th8OD1vaC44OQci-iFJFcdCZK9waRLWaIPOFxjq2baHGFvcESkQ1G2W6wgPGhVxiRfNQKjGiJQ0R3_ucTlv4nIjv_3WCGkp1ASelxFtHRDsrKRYCzqP2ohK7tupasO-L1_zVet0zVDaKlsml8rO3-WrH9tar210q_yxnT7bByC5luMSIUozi8LY1hDZ8K_r46yYhjIPix24X7ZA7peuMlix2Ekbm21kp5bea0VkJXLTwc4fywPbv7oQNwmBB31E8-PhJaIvrtj0Zo_L3rQOEffX_C_wPGQTkTJZyMoyyan0I3g2SR8x8ikrMQkc0cxDgXdzKGcR19gAk9xo8LSqoO3nC6gPycUz3xQbdzmBT4gfpwEZIa1Cy9E0sXqYPjsD8ZIctQBknvBpbp9eGOAPouHx1_dJGMyBnGajBWTnauoHonIfIHbz1Lxl_Hli02tjawwQXb3Jtd5lSOBPicDjTEI2RyOTm7OU09lW3-C6eCB9bJ6a4M2Ylvd-mLFWg14_wc2Mtkn_gj2CF8Fi9D1tI8on2FjXtu2bEaojSwIzG3anbIMDAfpDke_f2qFB6qekl31cFjpuN7ccePPCX3KlZgPu15TLXvdTymWpuvGSI7beErvc79AX3c7urK30x8IW_a0A3SGd8rLvm43vbWK65h0LhdLwteJv0l8rRdrmpRpudKe73Pz-D5ZnxyqkE-YK7KCz-R_Rz60ggJssen8TgTmHy9mw5c_cuGaiJPE8p2nhns_uDIEz55ElCyuwxsDKRILd3diVBSfhLNSOLbzjzjeBQLFqugFfN--1FjfVuZK15QvqEbtoIiSjdJHtOQklVTQBRFnJOc1FVMIgCSxFCnSU7iNMv4IVmJgoSEhhuSRRu3J6AxRBsIyYFWdZ5lOYpDaJmQgXsEBb06rvzzp0izZJOsJKtAav_eIqRi9V_QcUS3v33__Wn9JyKuXBAhrdAa-Lo_GdGKKzOi75wuKVeq8G-r6nzUKA6l0EbfHRlhJBSDLXyEDhQzoPFLrzTguueATcM6vN2quklj_9TRoj1JwC20GgzW5lUClq4kmHFXdHVWsvgP7z0X8z8BAAD__1jTqkM">