[llvm-branch-commits] [RISCV] Enable ShouldTrackLaneMasks when having vector instructions (PR #115843)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Nov 12 01:55:18 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Pengcheng Wang (wangpc-pp)
<details>
<summary>Changes</summary>
This can help to improve the register pressure for LMUL>1 cases.
---
Patch is 202.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115843.diff
40 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSubtarget.cpp (+4)
- (modified) llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll (+22-23)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll (+16-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+10-10)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+121-133)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll (+9-10)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+26-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll (+18-16)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll (+127-116)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll (+18-18)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll (+59-70)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (+14-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll (+212-244)
- (modified) llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll (+30-30)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll (+10-10)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (+22-22)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll (+108-44)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-constrained-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-constrained-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll (+16-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-constrained-sdnode.ll (+4-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll (+4-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-constrained-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll (+18-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll (+16-16)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 03397e1e0d89ee..3eae2b9774203f 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -211,4 +211,8 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
// Spilling is generally expensive on all RISC-V cores, so always enable
// register-pressure tracking. This will increase compile time.
Policy.ShouldTrackPressure = true;
+
+ // Enabling ShouldTrackLaneMasks when vector instructions are supported.
+ // TODO: Add extensions that need register pairs as well?
+ Policy.ShouldTrackLaneMasks = hasVInstructions();
}
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index 26e86d41176e04..5a38ec36068f93 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -24,31 +24,31 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
-; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
-; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
-; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
; CHECK-NEXT: #APP
@@ -58,27 +58,26 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, 1048572
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v12, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v14, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
+; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vl2r.v v12, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vl2r.v v14, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v14, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl1r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
-; CHECK-NEXT: vsext.vf2 v8, v14, v0.t
-; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
-; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v14, (a0)
+; CHECK-NEXT: vsext.vf2 v8, v16, v0.t
; CHECK-NEXT: lui a0, %hi(var_47)
; CHECK-NEXT: addi a0, a0, %lo(var_47)
; CHECK-NEXT: vsseg4e16.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
index ce83e2d8a62206..fea88673084a29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
@@ -16,33 +16,33 @@ define <512 x i8> @single_source(<512 x i8> %a) {
; CHECK-NEXT: addi s0, sp, 1536
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: andi sp, sp, -512
-; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: addi a1, sp, 512
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.x.s a2, v16
-; CHECK-NEXT: vslidedown.vi v24, v16, 5
+; CHECK-NEXT: vmv.x.s a2, v8
+; CHECK-NEXT: vslidedown.vi v24, v8, 5
; CHECK-NEXT: li a3, 432
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
-; CHECK-NEXT: vmv.v.x v8, a2
-; CHECK-NEXT: lbu a0, 770(sp)
-; CHECK-NEXT: li a1, 431
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: lbu a0, 1012(sp)
+; CHECK-NEXT: li a0, 431
+; CHECK-NEXT: vmv.v.x v16, a2
+; CHECK-NEXT: lbu a1, 770(sp)
+; CHECK-NEXT: vslide1down.vx v16, v16, a1
+; CHECK-NEXT: lbu a1, 1012(sp)
; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v24, a1
+; CHECK-NEXT: vslideup.vx v16, v24, a0
; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v24, v16, 4
-; CHECK-NEXT: li a1, 466
-; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: li a0, 465
+; CHECK-NEXT: vslidedown.vi v24, v8, 4
+; CHECK-NEXT: li a0, 466
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: li a1, 465
; CHECK-NEXT: li a2, 501
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v16, v24, a1
; CHECK-NEXT: li a0, 500
; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: vslideup.vx v16, v8, a0
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: addi sp, s0, -1536
; CHECK-NEXT: .cfi_def_cfa sp, 1536
; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index dbbb8362144cab..b94a523e130440 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -502,17 +502,17 @@ define <8 x i32> @add_constant_rhs_8xi32_vector_in2(<8 x i32> %vin, i32 %a, i32
; CHECK-NEXT: addi a1, a1, 25
; CHECK-NEXT: addi a2, a2, 1
; CHECK-NEXT: addi a3, a3, 2047
-; CHECK-NEXT: addi a3, a3, 308
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 5
+; CHECK-NEXT: addi a0, a3, 308
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
-; CHECK-NEXT: vmv.s.x v10, a3
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: ret
@@ -534,15 +534,15 @@ define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32
; CHECK-NEXT: addi a1, a1, 25
; CHECK-NEXT: addi a2, a2, 1
; CHECK-NEXT: addi a3, a3, 2047
-; CHECK-NEXT: addi a3, a3, 308
; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vslideup.vi v8, v10, 2
+; CHECK-NEXT: addi a0, a3, 308
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vmv.s.x v10, a3
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index ae5dbfa4bf30ba..6a8d98d55289bf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -17,25 +17,25 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
; CHECK-NEXT: vlm.v v8, (a0)
; CHECK-NEXT: li a0, -256
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vadd.vv v11, v9, v9
+; CHECK-NEXT: vadd.vv v12, v9, v9
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, a0
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vadd.vi v12, v11, -16
+; CHECK-NEXT: vadd.vi v13, v12, -16
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v8, 2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; CHECK-NEXT: vadd.vi v11, v11, -15
-; CHECK-NEXT: vmerge.vim v13, v10, 1, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vadd.vi v12, v12, -15
; CHECK-NEXT: vmerge.vim v14, v10, 1, v0
-; CHECK-NEXT: vnsrl.wi v8, v14, 0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vnsrl.wi v8, v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vrgather.vv v8, v13, v12, v0.t
-; CHECK-NEXT: vnsrl.wi v12, v14, 8
+; CHECK-NEXT: vrgather.vv v8, v14, v13, v0.t
+; CHECK-NEXT: vnsrl.wi v13, v10, 8
; CHECK-NEXT: vmsne.vi v10, v8, 0
-; CHECK-NEXT: vrgather.vv v12, v13, v11, v0.t
-; CHECK-NEXT: vmsne.vi v8, v12, 0
+; CHECK-NEXT: vrgather.vv v13, v14, v12, v0.t
+; CHECK-NEXT: vmsne.vi v8, v13, 0
; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
%vec = load <32 x i1>, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index b0f8bc9dcc6bd5..127428f8d5a299 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1266,19 +1266,16 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7, double %e8, double %e9, double %e10, double %e11, double %e12, double %e13, double %e14, double %e15, double %e16, double %e17, double %e18, double %e19, double %e20, double %e21, double %e22, double %e23, double %e24, double %e25, double %e26, double %e27, double %e28, double %e29, double %e30, double %e31) vscale_range(2,2) {
; RV32-LABEL: buildvec_v32f64_exact_vlen:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -96
-; RV32-NEXT: .cfi_def_cfa_offset 96
-; RV32-NEXT: fsd fs0, 88(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs1, 80(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs2, 72(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs3, 64(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs4, 56(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs5, 48(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs6, 40(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs7, 32(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs8, 24(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs9, 16(sp) # 8-byte Folded Spill
-; RV32-NEXT: fsd fs10, 8(sp) # 8-byte Folded Spill
+; RV32-NEXT: addi sp, sp, -80
+; RV32-NEXT: .cfi_def_cfa_offset 80
+; RV32-NEXT: fsd fs0, 72(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs1, 64(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs2, 56(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs3, 48(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs4, 40(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs5, 32(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs6, 24(sp) # 8-byte Folded Spill
+; RV32-NEXT: fsd fs7, 16(sp) # 8-byte Folded Spill
; RV32-NEXT: .cfi_offset fs0, -8
; RV32-NEXT: .cfi_offset fs1, -16
; RV32-NEXT: .cfi_offset fs2, -24
@@ -1287,85 +1284,79 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
; RV32-NEXT: .cfi_offset fs5, -48
; RV32-NEXT: .cfi_offset fs6, -56
; RV32-NEXT: .cfi_offset fs7, -64
-; RV32-NEXT: .cfi_offset fs8, -72
-; RV32-NEXT: .cfi_offset fs9, -80
-; RV32-NEXT: .cfi_offset fs10, -88
-; RV32-NEXT: sw a6, 0(sp)
-; RV32-NEXT: sw a7, 4(sp)
-; RV32-NEXT: fld ft0, 248(sp)
-; RV32-NEXT: fld ft1, 240(sp)
-; RV32-NEXT: fld ft2, 232(sp)
-; RV32-NEXT: fld ft3, 224(sp)
-; RV32-NEXT: fld ft6, 216(sp)
-; RV32-NEXT: fld ft8, 208(sp)
-; RV32-NEXT: fld ft10, 200(sp)
-; RV32-NEXT: fld fs1, 192(sp)
-; RV32-NEXT: fld ft11, 184(sp)
-; RV32-NEXT: fld fs4, 176(sp)
-; RV32-NEXT: fld fs2, 168(sp)
-; RV32-NEXT: fld fs5, 160(sp)
-; RV32-NEXT: fld fs3, 136(sp)
-; RV32-NEXT: fld fs6, 128(sp)
-; RV32-NEXT: fld fs7, 152(sp)
-; RV32-NEXT: fld fs8, 144(sp)
-; RV32-NEXT: fld ft4, 120(sp)
-; RV32-NEXT: fld ft5, 112(sp)
-; RV32-NEXT: fld ft7, 104(sp)
-; RV32-NEXT: fld ft9, 96(sp)
+; RV32-NEXT: sw a6, 8(sp)
+; RV32-NEXT: sw a7, 12(sp)
+; RV32-NEXT: fld ft0, 232(sp)
+; RV32-NEXT: fld ft4, 224(sp)
+; RV32-NEXT: fld ft1, 216(sp)
+; RV32-NEXT: fld ft7, 208(sp)
+; RV32-NEXT: fld ft2, 200(sp)
+; RV32-NEXT: fld ft10, 192(sp)
+; RV32-NEXT: fld ft3, 184(sp)
+; RV32-NEXT: fld fs1, 176(sp)
+; RV32-NEXT: fld ft5, 168(sp)
+; RV32-NEXT: fld fs2, 160(sp)
+; RV32-NEXT: fld ft6, 152(sp)
+; RV32-NEXT: fld fs3, 144(sp)
+; RV32-NEXT: fld ft8, 120(sp)
+; RV32-NEXT: fld fs4, 112(sp)
+; RV32-NEXT: fld ft9, 136(sp)
+; RV32-NEXT: fld fs5, 128(sp)
+; RV32-NEXT: fld ft11, 104(sp)
+; RV32-NEXT: fld fs6, 96(sp)
+; RV32-NEXT: fld fs0, 88(sp)
+; RV32-NEXT: fld fs7, 80(sp)
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vfmv.v.f v8, fa2
-; RV32-NEXT: fld fa2, 0(sp)
-; RV32-NEXT: sw a4, 0(sp)
-; RV32-NEXT: sw a5, 4(sp)
-; RV32-NEXT: fld fs0, 0(sp)
-; RV32-NEXT: sw a2, 0(sp)
-; RV32-NEXT: sw a3, 4(sp)
-; RV32-NEXT: fld fs9, 0(sp)
-; RV32-NEXT: sw a0, 0(sp)
-; RV32-NEXT: sw a1, 4(sp)
-; RV32-NEXT: fld fs10, 0(sp)
-; RV32-NEXT: vfmv.v.f v9, fs8
-; RV32-NEXT: vfmv.v.f v10, fs6
-; RV32-NEXT: vfmv.v.f v11, fs5
-; RV32-NEXT: vfmv.v.f v12, fs4
-; RV32-NEXT: vfmv.v.f v13, fs1
-; RV32-NEXT: vfslide1down.vf v17, v9, fs7
-; RV32-NEXT: vfslide1down.vf v16, v10, fs3
-; RV32-NEXT: vfslide1down.vf v18, v11, fs2
-; RV32-NEXT: vfmv.v.f v9, fs10
-; RV32-NEXT: vfslide1down.vf v19, v12, ft11
-; RV32-NEXT: vfslide1down.vf v20, v13, ft10
-; RV32-NEXT: vfslide1down.vf v12, v9, fs9
+; RV32-NEXT: vfmv.v.f v10, fa0
+; RV32-NEXT: vfmv.v.f v11, fa4
+; RV32-NEXT: vfmv.v.f v12, fa6
+; RV32-NEXT: fld fa4, 8(sp)
+; RV32-NEXT: sw a4, 8(sp)
+; RV32-NEXT: sw a5, 12(sp)
; RV32-NEXT: vfslide1down.vf v9, v8, fa3
-; RV32-NEXT: vfmv.v.f v8, ft8
-; RV32-NEXT: vfslide1down.vf v21, v8, ft6
-; RV32-NEXT: vfmv.v.f v8, fa0
-; RV32-NEXT: vfslide1down.vf v8, v8, fa1
-; RV32-NEXT: vfmv.v.f v10, ft3
-; RV32-NEXT: vfslide1down.vf v22, v10, ft2
-; RV32-NEXT: vfmv.v.f v10, fa4
-; RV32-NEXT: vfslide1down.vf v10, v10, fa5
-; RV32-NEXT: vfmv.v.f v11, fa6
-; RV32-NEXT: vfslide1down.vf v11, v11, fa7
-; RV32-NEXT: vfmv.v.f v13, fs0
-; RV32-NEXT: vfslide1down.vf v13, v13, fa2
-; RV32-NEXT: vfmv.v.f v14, ft9
-; RV32-NEXT: vfslide1down.vf v14, v14, ft7
-; RV32-NEXT: vfmv.v.f v15, ft5
-; RV32-NEXT: vfslide1down.vf v15, v15, ft4
-; RV32-NEXT: vfmv.v.f v23, ft1
-; RV32-NEXT: vfslide1down.vf v23, v23, ft0
-; RV32-NEXT: fld fs0, 88(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs1, 80(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs2, 72(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs3, 64(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs4, 56(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs5, 48(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs6, 40(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs7, 32(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs8, 24(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs9, 16(sp) # 8-byte Folded Reload
-; RV32-NEXT: fld fs10, 8(sp) # 8-byte Folded Reload
+; RV32-NEXT: vfslide1down.vf v8, v10, fa1
+; RV32-NEXT: vfslide1down.vf v10, v11, fa5
+; RV32-NEXT: vfslide1down.vf v11, v12, fa7
+; RV32-NEXT: fld fa5, 8(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a3, 12(sp)
+; RV32-NEXT: fld fa3, 8(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: fld fa2, 8(sp)
+; RV32-NEXT: vfmv.v.f v12, fs7
+; RV32-NEXT: vfmv.v.f v13, fs6
+; RV32-NEXT: vfmv.v.f v16, fs5
+; RV32-NEXT: vfmv.v.f v18, fs4
+; RV32-NEXT: vfmv.v.f v19, fs3
+; RV32-NEXT: vfmv.v.f v20, fs2
+; RV32-NEXT: vfmv.v.f v21, fs1
+; RV32-NEXT: vfmv.v.f v22, ft10
+; RV32-NEXT: vfmv.v.f v23, ft7
+; RV32-NEXT: vfmv.v.f v24, ft4
+; RV32-NEXT: vfslide1down.vf v14, v12, fs0
+; RV32-NEXT: vfslide1down.vf v15, v13, ft11
+; RV32-NEXT: vfslide1down.vf v17, v16, ft9
+; RV32-NEXT: vfslide1down.vf v16, v18, ft8
+; RV32-NEXT: vfslide1down.vf v18, v19, ft6
+; RV32-NEXT: vfslide1down.vf v19, v20, ft5
+; RV32-NEXT: vfslide1down.vf v20, v21, ft3
+; RV32-NEXT: vfslide1down.vf v21, v22, ft2
+; RV32-NEXT: vfslide1down.vf v22, v23, ft1
+; RV32-NEXT: vfmv.v.f v12, fa5
+; RV32-NEXT: vfslide1down.vf v13, v12, fa4
+; RV32-NEXT: vfmv.v.f v12, fa2
+; RV32-NEXT: vfslide1down.vf v12, v12, fa3
+; RV32-NEXT: vfslide1down.vf v23, v24, ft0
+; RV32-NEXT: fld fs0, 72(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs1, 64(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs2, 56(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs3, 48(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs4, 40(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs5, 32(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs6, 24(sp) # 8-byte Folded Reload
+; RV32-NEXT: fld fs7, 16(sp) # 8-byte Folded Reload
; RV32-NEXT: .cfi_restore fs0
; RV32-NEXT: .cfi_restore fs1
; RV32-NEXT: .cfi_restore fs2
@@ -1374,10 +1365,7 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
; RV32-NEXT: .cfi_restore fs5
; RV32-NEXT: .cfi_restore fs6
; RV32-NEXT: .cfi_restore fs7
-; RV32-NEXT: .cfi_restore fs8
-; RV32-NEXT: .cfi_restore fs9
-; RV32-NEXT: .cfi_restore fs10
-; RV32-NEXT: addi sp, sp, 96
+; RV32-NEXT: addi sp, sp, 80
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
@@ -1401,25 +1389,25 @@ define <32 x double> @buildvec_v32f64_exact_vlen(double %e0, double %e1, double
; RV64-NEXT: .cfi_offset fs5, -48
; RV64-NEXT: .cfi_offset fs6, -56
; RV64-NEXT: .cfi_offset fs7, -64
-; RV64-NEXT: fmv.d.x ft6, a7
-; RV64-NEXT: fmv.d.x ft9, a5
-; RV64-NEXT: fmv.d.x ft10, a3
-; RV64-NEXT: fmv.d.x ft11, a1
+; RV64-NEXT: fmv.d.x ft11, a7
+; RV64-NEXT: fmv.d.x fs0, a5
+; RV64-NEXT: fmv.d.x fs1, a3
+; RV64-NEXT: fmv.d.x fs2, a1
; RV64-NEXT: fld ft0, 184(sp)
-; RV64-NEXT: fld ft1, 176(sp)
-; RV64-NEXT: fld ft2, 168(sp)
-; RV64-NEXT: fld ft3, 160(sp)
-; RV64-NEXT: fld ft4, 152(sp)
-; RV64-NEXT: fld ft5, 144(sp)
-; RV64-NEXT: fld ft7, 136(sp)
-; RV64-NEXT: fld ft8, 128(sp)
-; RV64-NEXT: fld fs0, 120(sp)
-; RV64-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115843
More information about the llvm-branch-commits
mailing list