[llvm] [RISCV] Reorder the vector register allocation sequence. (PR #69290)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 16 23:05:04 PDT 2023
yanming123456 wrote:
a example
source:
```c
void vec(float *restrict out,
float *in1,
float *in2,
float *in3,
size_t n) {
size_t vl;
for (size_t i = 0; i < n; i += vl) {
vl = __riscv_vsetvl_e32m8(n);
vfloat32m8_t v1 = __riscv_vle32_v_f32m8(&in1[i], vl);
vfloat32m8_t v2 = __riscv_vle32_v_f32m8(&in2[i], vl);
vbool4_t true_mask = __riscv_vmfgt(v1, v2, vl);
vbool4_t false_mask = __riscv_vmnot(true_mask, vl);
vfloat32m8_t v3 = __riscv_vle32_v_f32m8_m(false_mask, &in3[i], vl);
vfloat32m8_t val = __riscv_vfsub(true_mask, v1, v2, vl);
val = __riscv_vfsub_mu(false_mask, val, v2, v3, vl);
__riscv_vse32(&out[i], val, vl);
}
}
```
before:
```asm
vec: # @vec
# %bb.0: # %entry
beqz a4, .LBB0_4
# %bb.1: # %for.body.lr.ph
addi sp, sp, -16
csrr a5, vlenb
slli a5, a5, 4
sub sp, sp, a5
li a5, 0
vsetvli a6, a4, e32, m8, ta, ma
slli a7, a6, 2
.LBB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
vsetvli zero, a6, e32, m8, ta, ma
vle32.v v24, (a1)
addi t0, sp, 16
vs8r.v v24, (t0) # Unknown-size Folded Spill
vle32.v v16, (a2)
vmflt.vv v8, v16, v24
vmnot.m v9, v8
vmv1r.v v0, v9
vle32.v v24, (a3), v0.t
csrr t0, vlenb
slli t0, t0, 3
add t0, t0, sp
addi t0, t0, 16
vs8r.v v24, (t0) # Unknown-size Folded Spill
vmv1r.v v0, v8
addi t0, sp, 16
vl8r.v v24, (t0) # Unknown-size Folded Reload
vfsub.vv v24, v24, v16, v0.t
vsetvli zero, zero, e32, m8, ta, mu
vmv1r.v v0, v9
csrr t0, vlenb
slli t0, t0, 3
add t0, t0, sp
addi t0, t0, 16
vl8r.v v8, (t0) # Unknown-size Folded Reload
vfsub.vv v24, v16, v8, v0.t
vse32.v v24, (a0)
add a5, a5, a6
add a0, a0, a7
add a3, a3, a7
add a2, a2, a7
add a1, a1, a7
bltu a5, a4, .LBB0_2
# %bb.3:
csrr a0, vlenb
slli a0, a0, 4
add sp, sp, a0
addi sp, sp, 16
.LBB0_4: # %for.cond.cleanup
ret
```
after:
```asm
vec: # @vec
# %bb.0: # %entry
beqz a4, .LBB0_3
# %bb.1: # %for.body.lr.ph
li a5, 0
vsetvli a6, a4, e32, m8, ta, ma
slli a7, a6, 2
.LBB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
vsetvli zero, a6, e32, m8, ta, mu
vle32.v v16, (a1)
vle32.v v8, (a2)
vmflt.vv v1, v8, v16
vmnot.m v2, v1
vmv1r.v v0, v2
vle32.v v24, (a3), v0.t
vmv1r.v v0, v1
vfsub.vv v16, v16, v8, v0.t
vmv1r.v v0, v2
vfsub.vv v16, v8, v24, v0.t
vse32.v v16, (a0)
add a5, a5, a6
add a0, a0, a7
add a3, a3, a7
add a2, a2, a7
add a1, a1, a7
bltu a5, a4, .LBB0_2
.LBB0_3: # %for.cond.cleanup
ret
```
https://github.com/llvm/llvm-project/pull/69290
More information about the llvm-commits
mailing list