[llvm] [RISCV] Rematerialize vmv.s.x and vfmv.s.f (PR #108012)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 10 18:43:53 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/108012
>From e608957ed5c5fcf5598e9a46df766b2b75a16a6c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 10 Sep 2024 19:55:52 +0800
Subject: [PATCH 1/2] Precommit tests
---
llvm/test/CodeGen/RISCV/rvv/remat.ll | 94 ++++++++++++++++++++++++++++
1 file changed, 94 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 343b086898c143..638f0623d382e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -377,3 +377,97 @@ define void @vfmv.v.f(ptr %p, double %x) {
store volatile double %x, ptr %p
ret void
}
+
+define void @vmv.s.x(ptr %p, i64 %x) {
+; CHECK-LABEL: vmv.s.x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %vmv.s.x = call <vscale x 8 x i64> @llvm.riscv.vmv.s.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
+ store volatile <vscale x 8 x i64> %vmv.s.x, ptr %p
+
+ %a = load volatile <vscale x 8 x i64>, ptr %p
+ %b = load volatile <vscale x 8 x i64>, ptr %p
+ %c = load volatile <vscale x 8 x i64>, ptr %p
+ %d = load volatile <vscale x 8 x i64>, ptr %p
+ store volatile <vscale x 8 x i64> %d, ptr %p
+ store volatile <vscale x 8 x i64> %c, ptr %p
+ store volatile <vscale x 8 x i64> %b, ptr %p
+ store volatile <vscale x 8 x i64> %a, ptr %p
+
+ store volatile <vscale x 8 x i64> %vmv.s.x, ptr %p
+ store volatile i64 %x, ptr %p
+ ret void
+}
+
+define void @vfmv.s.f(ptr %p, double %x) {
+; CHECK-LABEL: vfmv.s.f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: fsd fa0, 0(a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %vfmv.s.f = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
+ store volatile <vscale x 8 x double> %vfmv.s.f, ptr %p
+
+ %a = load volatile <vscale x 8 x double>, ptr %p
+ %b = load volatile <vscale x 8 x double>, ptr %p
+ %c = load volatile <vscale x 8 x double>, ptr %p
+ %d = load volatile <vscale x 8 x double>, ptr %p
+ store volatile <vscale x 8 x double> %d, ptr %p
+ store volatile <vscale x 8 x double> %c, ptr %p
+ store volatile <vscale x 8 x double> %b, ptr %p
+ store volatile <vscale x 8 x double> %a, ptr %p
+
+ store volatile <vscale x 8 x double> %vfmv.s.f, ptr %p
+ store volatile double %x, ptr %p
+ ret void
+}
>From 176055a468db7013d9aee01d6641e6303a014213 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 10 Sep 2024 20:03:59 +0800
Subject: [PATCH 2/2] [RISCV] Rematerialize vmv.s.x and vfmv.s.f
Continuing with #107993 and #108007, this handles the last of the main rematerializable vector instructions.
Program regalloc.NumSpills regalloc.NumReloads regalloc.NumRemats
lhs rhs diff lhs rhs diff lhs rhs diff
508.namd_r 6598.00 6598.00 0.0% 15509.00 15509.00 0.0% 2387.00 2387.00 0.0%
505.mcf_r 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0%
641.leela_s 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0%
631.deepsjeng_s 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0%
623.xalancbmk_s 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0%
620.omnetpp_s 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0%
605.mcf_s 141.00 141.00 0.0% 372.00 372.00 0.0% 36.00 36.00 0.0%
557.xz_r 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0%
541.leela_r 356.00 356.00 0.0% 525.00 525.00 0.0% 117.00 117.00 0.0%
531.deepsjeng_r 353.00 353.00 0.0% 682.00 682.00 0.0% 124.00 124.00 0.0%
520.omnetpp_r 946.00 946.00 0.0% 1485.00 1485.00 0.0% 1178.00 1178.00 0.0%
523.xalancbmk_r 1548.00 1548.00 0.0% 2466.00 2466.00 0.0% 620.00 620.00 0.0%
619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 1.00 1.00 0.0%
519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 1.00 1.00 0.0%
657.xz_s 289.00 289.00 0.0% 505.00 505.00 0.0% 172.00 172.00 0.0%
511.povray_r 1937.00 1936.00 -0.1% 3629.00 3628.00 -0.0% 517.00 518.00 0.2%
502.gcc_r 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0%
602.gcc_s 12450.00 12442.00 -0.1% 27328.00 27317.00 -0.0% 9409.00 9409.00 0.0%
638.imagick_s 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1%
538.imagick_r 4181.00 4178.00 -0.1% 11342.00 11338.00 -0.0% 3366.00 3368.00 0.1%
500.perlbench_r 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0%
600.perlbench_s 4178.00 4175.00 -0.1% 9162.00 9159.00 -0.0% 2410.00 2410.00 0.0%
525.x264_r 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0%
625.x264_s 1886.00 1884.00 -0.1% 4561.00 4559.00 -0.0% 471.00 471.00 0.0%
510.parest_r 42740.00 42689.00 -0.1% 82400.00 82252.00 -0.2% 5612.00 5620.00 0.1%
644.nab_s 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0%
544.nab_r 753.00 752.00 -0.1% 1183.00 1182.00 -0.1% 318.00 318.00 0.0%
526.blender_r 13105.00 13084.00 -0.2% 26478.00 26442.00 -0.1% 18991.00 18989.00 -0.0%
Geomean difference -0.0% -0.0% 0.0%
There's an extra spill in one of the test cases, but it's likely noise from the spill weights and isn't an issue in practice.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 +
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 4 +-
.../rvv/fixed-vectors-interleaved-access.ll | 640 +++++++++---------
llvm/test/CodeGen/RISCV/rvv/remat.ll | 152 +++--
4 files changed, 428 insertions(+), 370 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index a805c68e7795c6..13212c2aea5dde 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -172,6 +172,8 @@ bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
case RISCV::VMV_V_X:
case RISCV::VFMV_V_F:
case RISCV::VMV_V_I:
+ case RISCV::VMV_S_X:
+ case RISCV::VFMV_S_F:
case RISCV::VID_V:
if (MI.getOperand(1).isUndef() &&
/* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 2eceef5066f770..430e09fd834ba7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6764,7 +6764,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
Pseudo<(outs GPR:$rd), (ins VR:$rs2, ixlenimm:$sew), []>,
Sched<[WriteVMovXS, ReadVMovXS]>,
RISCVVPseudo;
- let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X,
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1,
Constraints = "$rd = $rs1" in
def PseudoVMV_S_X: Pseudo<(outs VR:$rd),
(ins VR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),
@@ -6787,7 +6787,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
(ins VR:$rs2, ixlenimm:$sew), []>,
Sched<[WriteVMovFS, ReadVMovFS]>,
RISCVVPseudo;
- let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F,
+ let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
Constraints = "$rd = $rs1" in
def "PseudoVFMV_S_" # f.FX :
Pseudo<(outs VR:$rd),
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index bc3e135a588a6f..eff56e408d6d51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -159,296 +159,308 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 80
+; RV32-NEXT: li a3, 84
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 80 * vlenb
+; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 84 * vlenb
; RV32-NEXT: addi a3, a1, 256
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; RV32-NEXT: vle32.v v16, (a3)
+; RV32-NEXT: vle32.v v8, (a3)
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 6
+; RV32-NEXT: li a4, 76
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: addi a3, a1, 128
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT: vslideup.vi v8, v16, 4
+; RV32-NEXT: vslideup.vi v4, v8, 4
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: li a5, 40
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v4, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 12
; RV32-NEXT: vmv.s.x v0, a4
-; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV32-NEXT: vslidedown.vi v16, v16, 16
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 56
+; RV32-NEXT: li a5, 24
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v3, v0
-; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t
+; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 16
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 44
+; RV32-NEXT: li a5, 48
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; RV32-NEXT: vslideup.vi v4, v8, 10, v0.t
; RV32-NEXT: lui a4, %hi(.LCPI6_0)
; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v8, (a4)
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v0, (a4)
; RV32-NEXT: lui a4, %hi(.LCPI6_1)
; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1)
; RV32-NEXT: lui a5, 1
; RV32-NEXT: vle16.v v8, (a4)
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a6, 24
+; RV32-NEXT: li a6, 56
; RV32-NEXT: mul a4, a4, a6
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 72
+; RV32-NEXT: li a4, 68
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vle32.v v24, (a3)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; RV32-NEXT: addi a1, a5, -64
-; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vmv.s.x v16, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 44
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v16, v8, v4
+; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vrgatherei16.vv v16, v8, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 44
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t
+; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 44
+; RV32-NEXT: li a3, 56
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: vmv.v.v v4, v16
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 44
+; RV32-NEXT: li a3, 36
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vslideup.vi v12, v8, 2
-; RV32-NEXT: vmv1r.v v8, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: li a3, 24
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vslideup.vi v12, v16, 8, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 56
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_2)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2)
; RV32-NEXT: lui a3, %hi(.LCPI6_3)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3)
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT: vle16.v v0, (a1)
-; RV32-NEXT: vle16.v v4, (a3)
+; RV32-NEXT: vle16.v v12, (a1)
+; RV32-NEXT: vle16.v v8, (a3)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 28
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_4)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v10, (a1)
+; RV32-NEXT: vle16.v v2, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v16, v0
+; RV32-NEXT: vrgatherei16.vv v24, v16, v12
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 44
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v24, v16, v4, v0.t
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 28
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v24, v8, v4, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 56
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v12, v24
+; RV32-NEXT: vmv.v.v v8, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 56
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v12, v24, v10
-; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vrgatherei16.vv v8, v24, v2
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v12, v24, 6, v0.t
+; RV32-NEXT: vslideup.vi v8, v24, 6, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 44
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_5)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5)
; RV32-NEXT: lui a3, %hi(.LCPI6_6)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v12, (a1)
-; RV32-NEXT: vle16.v v8, (a3)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v24, (a1)
+; RV32-NEXT: vle16.v v4, (a3)
; RV32-NEXT: li a1, 960
-; RV32-NEXT: vmv.s.x v8, a1
+; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v24, v0, v12
-; RV32-NEXT: vmv1r.v v3, v8
-; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vrgatherei16.vv v8, v16, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 28
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7)
; RV32-NEXT: lui a3, %hi(.LCPI6_8)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_8)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: vle16.v v16, (a1)
; RV32-NEXT: lui a1, %hi(.LCPI6_9)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9)
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT: vle16.v v4, (a3)
-; RV32-NEXT: vle16.v v12, (a1)
+; RV32-NEXT: vle16.v v8, (a3)
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: slli a3, a3, 2
+; RV32-NEXT: add a3, sp, a3
+; RV32-NEXT: addi a3, a3, 16
+; RV32-NEXT: vs4r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v12, v24, v8
+; RV32-NEXT: vrgatherei16.vv v12, v8, v16
+; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmv4r.v v24, v16
; RV32-NEXT: vslideup.vi v12, v16, 4, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
+; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v8, v16, v4
-; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vrgatherei16.vv v8, v0, v20
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
+; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v20, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
@@ -461,48 +473,51 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, 15
; RV32-NEXT: vmv.s.x v3, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v8, v16, 6
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vslideup.vi v8, v24, 6
; RV32-NEXT: vmv1r.v v0, v3
-; RV32-NEXT: vrgatherei16.vv v8, v24, v12, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: li a3, 76
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vmv4r.v v24, v16
; RV32-NEXT: lui a1, %hi(.LCPI6_11)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11)
; RV32-NEXT: lui a3, %hi(.LCPI6_12)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v24, (a1)
+; RV32-NEXT: vle16.v v28, (a1)
; RV32-NEXT: vle16.v v4, (a3)
; RV32-NEXT: li a1, 1008
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v24
+; RV32-NEXT: vrgatherei16.vv v8, v16, v28
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 60
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -511,14 +526,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a3, %hi(.LCPI6_14)
; RV32-NEXT: addi a3, a3, %lo(.LCPI6_14)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT: vle16.v v20, (a1)
+; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: lui a1, %hi(.LCPI6_15)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15)
; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT: vle16.v v24, (a3)
-; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: vle16.v v28, (a3)
+; RV32-NEXT: vle16.v v12, (a1)
; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a3, 40
@@ -526,21 +541,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 56
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v16, v8, v20, v0.t
+; RV32-NEXT: vrgatherei16.vv v16, v24, v8, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 44
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 28
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -548,20 +558,20 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v20, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a3, 68
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v8, v0, v24
+; RV32-NEXT: vrgatherei16.vv v8, v0, v28
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 48
+; RV32-NEXT: li a2, 60
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -570,56 +580,57 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 24
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
+; RV32-NEXT: vmv.v.v v28, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 12
+; RV32-NEXT: li a2, 76
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v24, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmv.v.v v28, v0
+; RV32-NEXT: vmv.v.v v24, v0
; RV32-NEXT: vmv.v.v v16, v8
; RV32-NEXT: addi a1, a0, 320
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vse32.v v16, (a1)
; RV32-NEXT: addi a1, a0, 256
-; RV32-NEXT: vse32.v v28, (a1)
-; RV32-NEXT: addi a1, a0, 192
; RV32-NEXT: vse32.v v24, (a1)
+; RV32-NEXT: addi a1, a0, 192
+; RV32-NEXT: vse32.v v28, (a1)
; RV32-NEXT: addi a1, a0, 128
; RV32-NEXT: vse32.v v20, (a1)
; RV32-NEXT: addi a1, a0, 64
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 36
+; RV32-NEXT: li a3, 56
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 44
+; RV32-NEXT: li a2, 36
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 80
+; RV32-NEXT: li a1, 84
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 16
@@ -630,15 +641,15 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 74
+; RV64-NEXT: li a3, 66
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: sub sp, sp, a2
-; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xca, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 74 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 66 * vlenb
; RV64-NEXT: addi a2, a1, 256
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 25
+; RV64-NEXT: li a3, 21
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
@@ -646,76 +657,85 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi a2, a1, 128
; RV64-NEXT: vle64.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a3, a1, 6
-; RV64-NEXT: add a1, a3, a1
+; RV64-NEXT: li a3, 57
+; RV64-NEXT: mul a1, a1, a3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vrgather.vi v12, v16, 4
; RV64-NEXT: li a1, 128
-; RV64-NEXT: vmv.s.x v8, a1
+; RV64-NEXT: vmv.s.x v0, a1
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma
; RV64-NEXT: vslidedown.vi v16, v16, 8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a3, 49
+; RV64-NEXT: li a3, 37
; RV64-NEXT: mul a1, a1, a3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vmv1r.v v0, v8
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vrgather.vi v12, v16, 2, v0.t
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vid.v v10
; RV64-NEXT: li a1, 6
-; RV64-NEXT: vmul.vx v2, v10, a1
+; RV64-NEXT: vmul.vx v8, v10, a1
; RV64-NEXT: li a1, 56
-; RV64-NEXT: vle64.v v16, (a2)
+; RV64-NEXT: vle64.v v24, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 57
+; RV64-NEXT: li a3, 45
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vmv.s.x v7, a1
-; RV64-NEXT: vadd.vi v10, v2, -16
+; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vmv.s.x v10, a1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 6
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 53
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v16, v24, v2
-; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: vs1r.v v10, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vadd.vi v10, v8, -16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 57
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vrgatherei16.vv v16, v0, v8
+; RV64-NEXT: vmv2r.v v4, v8
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 53
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl1r.v v6, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv1r.v v0, v6
; RV64-NEXT: vrgatherei16.vv v16, v24, v10, v0.t
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; RV64-NEXT: vmv.v.v v12, v16
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 21
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 25
+; RV64-NEXT: li a2, 21
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vrgather.vi v12, v16, 5
-; RV64-NEXT: vmv1r.v v0, v8
-; RV64-NEXT: vmv1r.v v6, v8
+; RV64-NEXT: vrgather.vi v12, v8, 5
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 49
+; RV64-NEXT: li a2, 37
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -723,19 +743,19 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vrgather.vi v12, v16, 3, v0.t
; RV64-NEXT: vmv.v.v v28, v12
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v24, v2, 1
-; RV64-NEXT: vadd.vi v26, v2, -15
+; RV64-NEXT: vadd.vi v24, v4, 1
+; RV64-NEXT: vadd.vi v26, v4, -15
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 6
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 57
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; RV64-NEXT: vrgatherei16.vv v16, v8, v24
-; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: vmv1r.v v0, v6
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 57
+; RV64-NEXT: li a2, 45
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -744,8 +764,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
; RV64-NEXT: vmv.v.v v28, v16
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 4
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 13
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
@@ -755,7 +775,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vmv.v.i v9, 6
; RV64-NEXT: vmv.v.x v10, a1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 25
+; RV64-NEXT: li a2, 21
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -763,259 +783,253 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vrgatherei16.vv v12, v16, v9
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 45
+; RV64-NEXT: li a2, 53
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vrgatherei16.vv v12, v16, v10
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 41
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vmv4r.v v8, v16
; RV64-NEXT: vrgather.vi v12, v16, 2
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 37
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vrgather.vi v12, v16, 3
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 5
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 29
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: li a1, 24
-; RV64-NEXT: vmv.s.x v1, a1
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v24, v2, 2
-; RV64-NEXT: vadd.vi v4, v2, -14
+; RV64-NEXT: vmv.s.x v0, a1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 6
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 21
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v8, v16, v24
-; RV64-NEXT: vmv1r.v v0, v1
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vadd.vi v16, v4, 2
+; RV64-NEXT: vadd.vi v2, v4, -14
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 57
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vrgatherei16.vv v8, v24, v16
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 25
+; RV64-NEXT: li a2, 45
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v2, v0.t
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vmv1r.v v0, v6
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 49
+; RV64-NEXT: li a2, 37
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 45
+; RV64-NEXT: li a2, 53
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vrgather.vi v20, v16, 4, v0.t
+; RV64-NEXT: vrgather.vi v28, v24, 4, v0.t
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 45
+; RV64-NEXT: li a2, 53
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vmv2r.v v8, v4
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v4, v2, 3
-; RV64-NEXT: vadd.vi v8, v2, -13
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vadd.vi v4, v4, 3
+; RV64-NEXT: vadd.vi v6, v8, -13
+; RV64-NEXT: vmv2r.v v2, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 6
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 57
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v8, v16, v4
-; RV64-NEXT: vmv1r.v v0, v1
+; RV64-NEXT: vrgatherei16.vv v8, v24, v4
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 21
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v6, v0.t
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 3
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 21
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vmv1r.v v0, v6
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 49
+; RV64-NEXT: li a2, 37
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 41
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vrgather.vi v8, v24, 5, v0.t
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 41
-; RV64-NEXT: mul a1, a1, a2
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vrgather.vi v4, v16, 5, v0.t
; RV64-NEXT: lui a1, 96
; RV64-NEXT: li a2, 192
-; RV64-NEXT: vmv.s.x v28, a2
+; RV64-NEXT: vmv.s.x v1, a2
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v8, a1
-; RV64-NEXT: vmv1r.v v0, v28
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 37
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vrgatherei16.vv v12, v24, v8, v0.t
+; RV64-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 37
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: li a1, 28
; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v30, v2, 4
-; RV64-NEXT: vadd.vi v6, v2, -12
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 6
+; RV64-NEXT: slli a2, a1, 3
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v16, v8, v30
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vadd.vi v22, v2, 4
+; RV64-NEXT: vadd.vi v20, v2, -12
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 57
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v8, v6, v0.t
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vrgatherei16.vv v8, v24, v22
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 45
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v24, v20, v0.t
; RV64-NEXT: lui a1, 112
; RV64-NEXT: addi a1, a1, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
-; RV64-NEXT: vmv1r.v v0, v28
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 5
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 29
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t
+; RV64-NEXT: vrgatherei16.vv v20, v16, v12, v0.t
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 5
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 29
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 45
+; RV64-NEXT: li a2, 53
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 25
-; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v16, v24
-; RV64-NEXT: vmv2r.v v8, v2
+; RV64-NEXT: vmv.v.v v12, v24
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 53
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vadd.vi v12, v2, 5
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 6
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 57
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v24, v0, v12
+; RV64-NEXT: vrgatherei16.vv v24, v16, v12
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v2, v8, -11
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vadd.vi v12, v2, -11
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 57
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v24, v8, v2, v0.t
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 41
+; RV64-NEXT: li a2, 45
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vrgatherei16.vv v24, v16, v12, v0.t
+; RV64-NEXT: vmv4r.v v12, v4
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 3
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 21
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
; RV64-NEXT: vmv.v.v v12, v0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 37
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 5
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv.v.v v20, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vmv.v.v v20, v0
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 5
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 29
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
@@ -1028,24 +1042,30 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi a1, a0, 192
; RV64-NEXT: vse64.v v12, (a1)
; RV64-NEXT: addi a1, a0, 128
-; RV64-NEXT: vse64.v v16, (a1)
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 53
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 16
+; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 64
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 4
-; RV64-NEXT: add a2, a3, a2
+; RV64-NEXT: li a3, 13
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 21
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: li a1, 74
+; RV64-NEXT: li a1, 66
; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 638f0623d382e3..4f58ccb5188d31 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -379,35 +379,53 @@ define void @vfmv.v.f(ptr %p, double %x) {
}
define void @vmv.s.x(ptr %p, i64 %x) {
-; CHECK-LABEL: vmv.s.x:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vs8r.v v0, (a0)
-; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: sd a1, 0(a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; POSTRA-LABEL: vmv.s.x:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; POSTRA-NEXT: vmv.s.x v8, a1
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vl8re64.v v16, (a0)
+; POSTRA-NEXT: vl8re64.v v24, (a0)
+; POSTRA-NEXT: vl8re64.v v0, (a0)
+; POSTRA-NEXT: vl8re64.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v0, (a0)
+; POSTRA-NEXT: vs8r.v v24, (a0)
+; POSTRA-NEXT: vs8r.v v16, (a0)
+; POSTRA-NEXT: vmv.s.x v8, a1
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: sd a1, 0(a0)
+; POSTRA-NEXT: ret
+;
+; PRERA-LABEL: vmv.s.x:
+; PRERA: # %bb.0:
+; PRERA-NEXT: addi sp, sp, -16
+; PRERA-NEXT: .cfi_def_cfa_offset 16
+; PRERA-NEXT: csrr a2, vlenb
+; PRERA-NEXT: slli a2, a2, 3
+; PRERA-NEXT: sub sp, sp, a2
+; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; PRERA-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; PRERA-NEXT: vmv.s.x v8, a1
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: addi a2, sp, 16
+; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; PRERA-NEXT: vl8re64.v v24, (a0)
+; PRERA-NEXT: vl8re64.v v0, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v0, (a0)
+; PRERA-NEXT: vs8r.v v24, (a0)
+; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: sd a1, 0(a0)
+; PRERA-NEXT: csrr a0, vlenb
+; PRERA-NEXT: slli a0, a0, 3
+; PRERA-NEXT: add sp, sp, a0
+; PRERA-NEXT: addi sp, sp, 16
+; PRERA-NEXT: ret
%vmv.s.x = call <vscale x 8 x i64> @llvm.riscv.vmv.s.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
store volatile <vscale x 8 x i64> %vmv.s.x, ptr %p
@@ -426,35 +444,53 @@ define void @vmv.s.x(ptr %p, i64 %x) {
}
define void @vfmv.s.f(ptr %p, double %x) {
-; CHECK-LABEL: vfmv.s.f:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vs8r.v v0, (a0)
-; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: fsd fa0, 0(a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; POSTRA-LABEL: vfmv.s.f:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; POSTRA-NEXT: vfmv.s.f v8, fa0
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vl8re64.v v16, (a0)
+; POSTRA-NEXT: vl8re64.v v24, (a0)
+; POSTRA-NEXT: vl8re64.v v0, (a0)
+; POSTRA-NEXT: vl8re64.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v0, (a0)
+; POSTRA-NEXT: vs8r.v v24, (a0)
+; POSTRA-NEXT: vs8r.v v16, (a0)
+; POSTRA-NEXT: vfmv.s.f v8, fa0
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: fsd fa0, 0(a0)
+; POSTRA-NEXT: ret
+;
+; PRERA-LABEL: vfmv.s.f:
+; PRERA: # %bb.0:
+; PRERA-NEXT: addi sp, sp, -16
+; PRERA-NEXT: .cfi_def_cfa_offset 16
+; PRERA-NEXT: csrr a1, vlenb
+; PRERA-NEXT: slli a1, a1, 3
+; PRERA-NEXT: sub sp, sp, a1
+; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; PRERA-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; PRERA-NEXT: vfmv.s.f v8, fa0
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: addi a1, sp, 16
+; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; PRERA-NEXT: vl8re64.v v24, (a0)
+; PRERA-NEXT: vl8re64.v v0, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v0, (a0)
+; PRERA-NEXT: vs8r.v v24, (a0)
+; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: fsd fa0, 0(a0)
+; PRERA-NEXT: csrr a0, vlenb
+; PRERA-NEXT: slli a0, a0, 3
+; PRERA-NEXT: add sp, sp, a0
+; PRERA-NEXT: addi sp, sp, 16
+; PRERA-NEXT: ret
%vfmv.s.f = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
store volatile <vscale x 8 x double> %vfmv.s.f, ptr %p
More information about the llvm-commits
mailing list