[llvm] [RISCV] Reorder the vector register allocation sequence. (PR #69290)
Shao-Ce SUN via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 16 23:08:00 PDT 2023
sunshaoce wrote:
> a example source:
>
> ```c
> void vec(float *restrict out,
> float *in1,
> float *in2,
> float *in3,
> size_t n) {
> size_t vl;
> for (size_t i = 0; i < n; i += vl) {
> vl = __riscv_vsetvl_e32m8(n);
> vfloat32m8_t v1 = __riscv_vle32_v_f32m8(&in1[i], vl);
> vfloat32m8_t v2 = __riscv_vle32_v_f32m8(&in2[i], vl);
> vbool4_t true_mask = __riscv_vmfgt(v1, v2, vl);
> vbool4_t false_mask = __riscv_vmnot(true_mask, vl);
> vfloat32m8_t v3 = __riscv_vle32_v_f32m8_m(false_mask, &in3[i], vl);
> vfloat32m8_t val = __riscv_vfsub(true_mask, v1, v2, vl);
> val = __riscv_vfsub_mu(false_mask, val, v2, v3, vl);
> __riscv_vse32(&out[i], val, vl);
> }
> }
> ```
>
> before:
>
> ```assembly
> vec: # @vec
> # %bb.0: # %entry
> beqz a4, .LBB0_4
> # %bb.1: # %for.body.lr.ph
> addi sp, sp, -16
> csrr a5, vlenb
> slli a5, a5, 4
> sub sp, sp, a5
> li a5, 0
> vsetvli a6, a4, e32, m8, ta, ma
> slli a7, a6, 2
> .LBB0_2: # %for.body
> # =>This Inner Loop Header: Depth=1
> vsetvli zero, a6, e32, m8, ta, ma
> vle32.v v24, (a1)
> addi t0, sp, 16
> vs8r.v v24, (t0) # Unknown-size Folded Spill
> vle32.v v16, (a2)
> vmflt.vv v8, v16, v24
> vmnot.m v9, v8
> vmv1r.v v0, v9
> vle32.v v24, (a3), v0.t
> csrr t0, vlenb
> slli t0, t0, 3
> add t0, t0, sp
> addi t0, t0, 16
> vs8r.v v24, (t0) # Unknown-size Folded Spill
> vmv1r.v v0, v8
> addi t0, sp, 16
> vl8r.v v24, (t0) # Unknown-size Folded Reload
> vfsub.vv v24, v24, v16, v0.t
> vsetvli zero, zero, e32, m8, ta, mu
> vmv1r.v v0, v9
> csrr t0, vlenb
> slli t0, t0, 3
> add t0, t0, sp
> addi t0, t0, 16
> vl8r.v v8, (t0) # Unknown-size Folded Reload
> vfsub.vv v24, v16, v8, v0.t
> vse32.v v24, (a0)
> add a5, a5, a6
> add a0, a0, a7
> add a3, a3, a7
> add a2, a2, a7
> add a1, a1, a7
> bltu a5, a4, .LBB0_2
> # %bb.3:
> csrr a0, vlenb
> slli a0, a0, 4
> add sp, sp, a0
> addi sp, sp, 16
> .LBB0_4: # %for.cond.cleanup
> ret
> ```
>
> after:
>
> ```assembly
> vec: # @vec
> # %bb.0: # %entry
> beqz a4, .LBB0_3
> # %bb.1: # %for.body.lr.ph
> li a5, 0
> vsetvli a6, a4, e32, m8, ta, ma
> slli a7, a6, 2
> .LBB0_2: # %for.body
> # =>This Inner Loop Header: Depth=1
> vsetvli zero, a6, e32, m8, ta, mu
> vle32.v v16, (a1)
> vle32.v v8, (a2)
> vmflt.vv v1, v8, v16
> vmnot.m v2, v1
> vmv1r.v v0, v2
> vle32.v v24, (a3), v0.t
> vmv1r.v v0, v1
> vfsub.vv v16, v16, v8, v0.t
> vmv1r.v v0, v2
> vfsub.vv v16, v8, v24, v0.t
> vse32.v v16, (a0)
> add a5, a5, a6
> add a0, a0, a7
> add a3, a3, a7
> add a2, a2, a7
> add a1, a1, a7
> bltu a5, a4, .LBB0_2
> .LBB0_3: # %for.cond.cleanup
> ret
> ```
I think this can be added to the test cases.
https://github.com/llvm/llvm-project/pull/69290
More information about the llvm-commits
mailing list