[llvm] [RISCV] Move VMV0 elimination past machine SSA opts (PR #126850)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 20:15:56 PST 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/126850
>From 2c4144533946f0573f8de6411c718f41b2310fe1 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 12 Feb 2025 12:20:34 +0800
Subject: [PATCH] [RISCV] Move VMV0 elimination past machine SSA opts
This is the follow up to #125026 that keeps mask operands in virtual register form for as long as possible throughout the backend.
The diffs in this patch are from MachineCSE/MachineSink/RISCVVLOptimizer kicking in.
The invariant that the mask COPY never has a subreg no longer holds after MachineCSE (it coalesces some copies), so it needed to be relaxed.
---
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 8 +-
.../lib/Target/RISCV/RISCVVMV0Elimination.cpp | 5 +-
llvm/test/CodeGen/RISCV/O0-pipeline.ll | 2 +-
llvm/test/CodeGen/RISCV/O3-pipeline.ll | 2 +-
llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 20 +-
llvm/test/CodeGen/RISCV/rvv/commutable.ll | 69 +-
llvm/test/CodeGen/RISCV/rvv/copyprop.mir | 8 +-
.../RISCV/rvv/fixed-vectors-cttz-vp.ll | 126 +--
.../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 24 -
.../RISCV/rvv/fixed-vectors-trunc-vp.ll | 953 ++++++++++++------
.../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 34 +-
.../RISCV/rvv/fixed-vectors-vselect.ll | 36 +-
llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 20 +-
llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 6 +-
llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 6 +-
llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 46 +-
llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 47 +-
llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 20 +-
llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 20 +-
llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 20 +-
llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 24 +-
.../RISCV/rvv/vector-extract-last-active.ll | 96 +-
.../RISCV/rvv/vector-reassociations.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 178 ++--
llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 109 +-
llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll | 16 +-
.../RISCV/rvv/vp-vector-interleaved-access.ll | 22 +-
.../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vpload.ll | 18 +-
.../CodeGen/RISCV/rvv/vreductions-fp-vp.ll | 68 +-
llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 83 +-
36 files changed, 1217 insertions(+), 949 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 89e017807363b..52bb10f9ba19b 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -589,8 +589,6 @@ void RISCVPassConfig::addPreEmitPass2() {
void RISCVPassConfig::addMachineSSAOptimization() {
addPass(createRISCVVectorPeepholePass());
- // TODO: Move this to pre regalloc
- addPass(createRISCVVMV0EliminationPass());
addPass(createRISCVFoldMemOffsetPass());
TargetPassConfig::addMachineSSAOptimization();
@@ -604,10 +602,6 @@ void RISCVPassConfig::addMachineSSAOptimization() {
}
void RISCVPassConfig::addPreRegAlloc() {
- // TODO: Move this as late as possible before regalloc
- if (TM->getOptLevel() == CodeGenOptLevel::None)
- addPass(createRISCVVMV0EliminationPass());
-
addPass(createRISCVPreRAExpandPseudoPass());
if (TM->getOptLevel() != CodeGenOptLevel::None) {
addPass(createRISCVMergeBaseOffsetOptPass());
@@ -621,6 +615,8 @@ void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner)
addPass(&MachinePipelinerID);
+
+ addPass(createRISCVVMV0EliminationPass());
}
void RISCVPassConfig::addFastRegAlloc() {
diff --git a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp
index ccc86da340440..9737474a18f63 100644
--- a/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp
@@ -131,10 +131,9 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) {
// Peek through a single copy to match what isel does.
if (MachineInstr *SrcMI = MRI.getVRegDef(Src);
- SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual()) {
- assert(SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister);
+ SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual() &&
+ SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister)
Src = SrcMI->getOperand(1).getReg();
- }
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0)
.addReg(Src);
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index a50c303819f23..f93cb65897210 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -39,11 +39,11 @@
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: Local Stack Slot Allocation
-; CHECK-NEXT: RISC-V VMV0 Elimination
; CHECK-NEXT: RISC-V Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V Landing Pad Setup
+; CHECK-NEXT: RISC-V VMV0 Elimination
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 194223eee69eb..b67fbe1b5d3cd 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -97,7 +97,6 @@
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: RISC-V Vector Peephole Optimization
-; CHECK-NEXT: RISC-V VMV0 Elimination
; CHECK-NEXT: RISC-V Fold Memory Offset
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
@@ -129,6 +128,7 @@
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V Landing Pad Setup
+; CHECK-NEXT: RISC-V VMV0 Elimination
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index 1b9c78a20ec3b..039266b169ab2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -1515,40 +1515,36 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, <
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 3
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/commutable.ll b/llvm/test/CodeGen/RISCV/rvv/commutable.ll
index e26c467f025bd..5f35626120178 100644
--- a/llvm/test/CodeGen/RISCV/rvv/commutable.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/commutable.ll
@@ -26,10 +26,9 @@ define <vscale x 1 x i64> @commutable_vadd_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vadd_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v10, v8, v9, v0.t
; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -59,10 +58,9 @@ define <vscale x 1 x i64> @commutable_vand_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vand_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vand.vv v10, v8, v9, v0.t
; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vand.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vand.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -92,10 +90,9 @@ define <vscale x 1 x i64> @commutable_vor_vv_masked(<vscale x 1 x i64> %0, <vsca
; CHECK-LABEL: commutable_vor_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vor.vv v10, v8, v9, v0.t
; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vor.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vor.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -125,10 +122,9 @@ define <vscale x 1 x i64> @commutable_vxor_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vxor_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vxor.vv v10, v8, v9, v0.t
; CHECK-NEXT: vxor.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vxor.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vxor.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -158,10 +154,9 @@ define <vscale x 1 x i1> @commutable_vmseq_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vmseq_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmseq.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmseq.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmxor.mm v0, v10, v8
+; CHECK-NEXT: vmxor.mm v0, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64(<vscale x 1 x i1> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2)
%b = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64(<vscale x 1 x i1> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2)
@@ -191,10 +186,9 @@ define <vscale x 1 x i1> @commutable_vmsne_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vmsne_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmsne.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmsne.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmxor.mm v0, v10, v8
+; CHECK-NEXT: vmxor.mm v0, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i64(<vscale x 1 x i1> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2)
%b = call <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i64(<vscale x 1 x i1> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2)
@@ -224,10 +218,9 @@ define <vscale x 1 x i64> @commutable_vmin_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vmin_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmin.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmin.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vmin.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vmin.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -257,10 +250,9 @@ define <vscale x 1 x i64> @commutable_vminu_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK-LABEL: commutable_vminu_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vminu.vv v10, v8, v9, v0.t
; CHECK-NEXT: vminu.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vminu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vminu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -290,10 +282,9 @@ define <vscale x 1 x i64> @commutable_vmax_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vmax_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmax.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmax.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vmax.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vmax.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -323,10 +314,9 @@ define <vscale x 1 x i64> @commutable_vmaxu_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK-LABEL: commutable_vmaxu_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmaxu.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmaxu.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vmaxu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -356,10 +346,9 @@ define <vscale x 1 x i64> @commutable_vmul_vv_masked(<vscale x 1 x i64> %0, <vsc
; CHECK-LABEL: commutable_vmul_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmul.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmul.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vmul.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vmul.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -389,10 +378,9 @@ define <vscale x 1 x i64> @commutable_vmulh_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK-LABEL: commutable_vmulh_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmulh.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmulh.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -422,10 +410,9 @@ define <vscale x 1 x i64> @commutable_vmulhu_vv_masked(<vscale x 1 x i64> %0, <v
; CHECK-LABEL: commutable_vmulhu_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vmulhu.vv v10, v8, v9, v0.t
; CHECK-NEXT: vmulhu.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -456,9 +443,8 @@ define <vscale x 1 x i64> @commutable_vwadd_vv_masked(<vscale x 1 x i32> %0, <vs
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vwadd.vv v10, v8, v9, v0.t
-; CHECK-NEXT: vwadd.vv v11, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v11
+; CHECK-NEXT: vadd.vv v8, v10, v10
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vwadd.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %1, <vscale x 1 x i32> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -489,9 +475,8 @@ define <vscale x 1 x i64> @commutable_vwaddu_vv_masked(<vscale x 1 x i32> %0, <v
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9, v0.t
-; CHECK-NEXT: vwaddu.vv v11, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v11
+; CHECK-NEXT: vadd.vv v8, v10, v10
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vwaddu.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %1, <vscale x 1 x i32> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -522,9 +507,8 @@ define <vscale x 1 x i64> @commutable_vwmul_vv_masked(<vscale x 1 x i32> %0, <vs
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vwmul.vv v10, v8, v9, v0.t
-; CHECK-NEXT: vwmul.vv v11, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v11
+; CHECK-NEXT: vadd.vv v8, v10, v10
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vwmul.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %1, <vscale x 1 x i32> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -555,9 +539,8 @@ define <vscale x 1 x i64> @commutable_vwmulu_vv_masked(<vscale x 1 x i32> %0, <v
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vwmulu.vv v10, v8, v9, v0.t
-; CHECK-NEXT: vwmulu.vv v11, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v11
+; CHECK-NEXT: vadd.vv v8, v10, v10
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vwmulu.mask.nxv1i64.nxv1i32.nxv1i32(<vscale x 1 x i64> undef, <vscale x 1 x i32> %1, <vscale x 1 x i32> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -637,10 +620,9 @@ define <vscale x 1 x i64> @commutable_vadc_vv(<vscale x 1 x i64> %0, <vscale x 1
; CHECK-LABEL: commutable_vadc_vv:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vadc.vvm v10, v8, v9, v0
; CHECK-NEXT: vadc.vvm v8, v8, v9, v0
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vadc.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2)
@@ -671,10 +653,9 @@ define <vscale x 1 x i64> @commutable_vsadd_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK-LABEL: commutable_vsadd_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vsadd.vv v10, v8, v9, v0.t
; CHECK-NEXT: vsadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vsadd.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -704,10 +685,9 @@ define <vscale x 1 x i64> @commutable_vsaddu_vv_masked(<vscale x 1 x i64> %0, <v
; CHECK-LABEL: commutable_vsaddu_vv_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vsaddu.vv v10, v8, v9, v0.t
; CHECK-NEXT: vsaddu.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vsaddu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen %2, iXLen 1)
@@ -739,10 +719,9 @@ define <vscale x 1 x i64> @commutable_vaadd_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK: # %bb.0:
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vaadd.vv v10, v8, v9, v0.t
; CHECK-NEXT: vaadd.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen 0, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vaadd.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen 0, iXLen %2, iXLen 1)
@@ -774,10 +753,9 @@ define <vscale x 1 x i64> @commutable_vaaddu_vv_masked(<vscale x 1 x i64> %0, <v
; CHECK: # %bb.0:
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vaaddu.vv v10, v8, v9, v0.t
; CHECK-NEXT: vaaddu.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen 0, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vaaddu.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen 0, iXLen %2, iXLen 1)
@@ -809,10 +787,9 @@ define <vscale x 1 x i64> @commutable_vsmul_vv_masked(<vscale x 1 x i64> %0, <vs
; CHECK: # %bb.0:
; CHECK-NEXT: csrwi vxrm, 0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
-; CHECK-NEXT: vsmul.vv v10, v8, v9, v0.t
; CHECK-NEXT: vsmul.vv v8, v8, v9, v0.t
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vadd.vv v8, v10, v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
%a = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %mask, iXLen 0, iXLen %2, iXLen 1)
%b = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x i64> %1, <vscale x 1 x i64> %0, <vscale x 1 x i1> %mask, iXLen 0, iXLen %2, iXLen 1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
index 0b905b57f92b8..be73d4808937a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir
@@ -6,9 +6,6 @@
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
- ; CHECK-NEXT: vmsne.vi v0, v8, 0
- ; CHECK-NEXT: vsll.vi v8, v8, 5
- ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1
; CHECK-NEXT: bgeu a0, zero, .LBB0_3
; CHECK-NEXT: # %bb.1: # %entry
@@ -22,7 +19,10 @@
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: bgeu a0, a2, .LBB0_2
; CHECK-NEXT: .LBB0_4: # %entry
- ; CHECK-NEXT: vse64.v v8, (a1)
+ ; CHECK-NEXT: vmsne.vi v0, v8, 0
+ ; CHECK-NEXT: vsll.vi v8, v8, 5
+ ; CHECK-NEXT: vmerge.vim v9, v8, -1, v0
+ ; CHECK-NEXT: vse64.v v9, (a1)
; CHECK-NEXT: ret
entry:
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 5f275da1740cb..cd4b19f11d160 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -1735,8 +1735,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 48
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1771,7 +1770,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
+; RV32-NEXT: li a5, 24
+; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
@@ -1786,12 +1786,13 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: addi a3, sp, 32
; RV32-NEXT: vlse64.v v8, (a3), zero
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -1805,21 +1806,24 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
@@ -1837,7 +1841,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v8, (a4), zero
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
@@ -1851,7 +1856,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 5
+; RV32-NEXT: li a3, 24
+; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
@@ -1869,8 +1875,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: vmv1r.v v0, v7
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 24
-; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -1886,41 +1891,28 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
+; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v24, v8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v24, v0.t
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
+; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
@@ -1932,7 +1924,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: li a1, 24
+; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -3928,8 +3921,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 48
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -3964,7 +3956,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 5
+; RV32-NEXT: li a5, 24
+; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
@@ -3979,12 +3972,13 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: addi a3, sp, 32
; RV32-NEXT: vlse64.v v8, (a3), zero
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 4
+; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -3998,21 +3992,24 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
@@ -4030,7 +4027,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v8, (a4), zero
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 24
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
@@ -4044,7 +4042,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 5
+; RV32-NEXT: li a3, 24
+; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
@@ -4062,8 +4061,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: vmv1r.v v0, v7
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 24
-; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 48
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
@@ -4079,41 +4077,28 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
-; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
+; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v24, v8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
-; RV32-NEXT: mul a0, a0, a1
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 40
-; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v24, v0.t
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 24
+; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
@@ -4125,7 +4110,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: li a1, 24
+; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index bef29dfecef4c..c885b3c03270c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -3902,12 +3902,10 @@ define void @trunc_v6bf16(ptr %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
@@ -3969,10 +3967,8 @@ define void @trunc_v6f16(ptr %x) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: lui a1, %hi(.LCPI172_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
@@ -3986,12 +3982,10 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
@@ -4082,13 +4076,11 @@ define void @ceil_v6bf16(ptr %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 3
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
@@ -4155,11 +4147,9 @@ define void @ceil_v6f16(ptr %x) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: lui a1, %hi(.LCPI178_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
; ZVFH-NEXT: fsrmi a1, 3
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
; ZVFH-NEXT: fsrm a1
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -4174,13 +4164,11 @@ define void @ceil_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 3
-; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
@@ -4276,13 +4264,11 @@ define void @floor_v6bf16(ptr %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 2
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
@@ -4349,11 +4335,9 @@ define void @floor_v6f16(ptr %x) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: lui a1, %hi(.LCPI184_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
; ZVFH-NEXT: fsrmi a1, 2
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
; ZVFH-NEXT: fsrm a1
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -4368,13 +4352,11 @@ define void @floor_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 2
-; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
@@ -4470,13 +4452,11 @@ define void @round_v6bf16(ptr %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a1, 307200
; CHECK-NEXT: fmv.w.x fa5, a1
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v10
; CHECK-NEXT: vmflt.vf v0, v8, fa5
; CHECK-NEXT: fsrmi a1, 4
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
@@ -4543,11 +4523,9 @@ define void @round_v6f16(ptr %x) {
; ZVFH-NEXT: vle16.v v8, (a0)
; ZVFH-NEXT: lui a1, %hi(.LCPI190_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1)
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFH-NEXT: vfabs.v v9, v8
; ZVFH-NEXT: vmflt.vf v0, v9, fa5
; ZVFH-NEXT: fsrmi a1, 4
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
; ZVFH-NEXT: fsrm a1
; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
@@ -4562,13 +4540,11 @@ define void @round_v6f16(ptr %x) {
; ZVFHMIN-NEXT: vle16.v v8, (a0)
; ZVFHMIN-NEXT: lui a1, 307200
; ZVFHMIN-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfabs.v v8, v10
; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
; ZVFHMIN-NEXT: fsrmi a1, 4
-; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
; ZVFHMIN-NEXT: fsrm a1
; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index a91dee1cb245f..037ed257f4a89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
declare <2 x i7> @llvm.vp.trunc.v2i7.v2i16(<2 x i16>, <2 x i1>, i32)
@@ -222,316 +222,645 @@ define <2 x i32> @vtrunc_v2i32_v2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) {
declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32)
define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) {
-; CHECK-LABEL: vtrunc_v128i32_v128i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 72
-; CHECK-NEXT: mul a2, a2, a3
-; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb
-; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a2, a2, a3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 5
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT: vslidedown.vi v6, v0, 8
-; CHECK-NEXT: addi a2, a1, 512
-; CHECK-NEXT: addi a3, a1, 640
-; CHECK-NEXT: addi a4, a7, -64
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v27, v6, 4
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a3)
-; CHECK-NEXT: sltu a3, a7, a4
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v27, 2
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: and a4, a3, a4
-; CHECK-NEXT: addi a3, a4, -32
-; CHECK-NEXT: sltu a5, a4, a3
-; CHECK-NEXT: addi a5, a5, -1
-; CHECK-NEXT: and a3, a5, a3
-; CHECK-NEXT: addi a5, a3, -16
-; CHECK-NEXT: sltu a6, a3, a5
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 4
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a2)
-; CHECK-NEXT: addi a5, a1, 128
-; CHECK-NEXT: li a2, 16
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v26, v7, 4
-; CHECK-NEXT: bltu a3, a2, .LBB16_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 16
-; CHECK-NEXT: .LBB16_2:
-; CHECK-NEXT: vmv1r.v v0, v27
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a5)
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 56
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v27, v26, 2
-; CHECK-NEXT: li a5, 64
-; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a3, a3, 6
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: mv a6, a7
-; CHECK-NEXT: bltu a7, a5, .LBB16_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a6, 64
-; CHECK-NEXT: .LBB16_4:
-; CHECK-NEXT: vmv1r.v v0, v27
-; CHECK-NEXT: addi a5, a1, 384
-; CHECK-NEXT: li a3, 32
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a1)
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: li t1, 48
-; CHECK-NEXT: mul t0, t0, t1
-; CHECK-NEXT: add t0, sp, t0
-; CHECK-NEXT: addi t0, t0, 16
-; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
-; CHECK-NEXT: addi t0, a6, -32
-; CHECK-NEXT: sltu a6, a6, t0
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a6, a6, t0
-; CHECK-NEXT: addi t0, a6, -16
-; CHECK-NEXT: sltu t1, a6, t0
-; CHECK-NEXT: addi t1, t1, -1
-; CHECK-NEXT: and t0, t1, t0
-; CHECK-NEXT: csrr t1, vlenb
-; CHECK-NEXT: li t2, 56
-; CHECK-NEXT: mul t1, t1, t2
-; CHECK-NEXT: add t1, sp, t1
-; CHECK-NEXT: addi t1, t1, 16
-; CHECK-NEXT: vl8r.v v16, (t1) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: slli t0, t0, 3
-; CHECK-NEXT: add t0, sp, t0
-; CHECK-NEXT: addi t0, t0, 16
-; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a6, a2, .LBB16_6
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: li a6, 16
-; CHECK-NEXT: .LBB16_6:
-; CHECK-NEXT: vmv1r.v v0, v26
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v8, (a5)
-; CHECK-NEXT: addi a5, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, a1, 256
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v26, v6, 2
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li t0, 48
-; CHECK-NEXT: mul a5, a5, t0
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 56
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: mv a5, a4
-; CHECK-NEXT: bltu a4, a3, .LBB16_8
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: li a5, 32
-; CHECK-NEXT: .LBB16_8:
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a1)
-; CHECK-NEXT: addi a1, a5, -16
-; CHECK-NEXT: sltu a5, a5, a1
-; CHECK-NEXT: addi a5, a5, -1
-; CHECK-NEXT: and a1, a5, a1
-; CHECK-NEXT: vmv1r.v v0, v26
-; CHECK-NEXT: addi a5, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a5, 40
-; CHECK-NEXT: mul a1, a1, a5
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a2, .LBB16_10
-; CHECK-NEXT: # %bb.9:
-; CHECK-NEXT: li a4, 16
-; CHECK-NEXT: .LBB16_10:
-; CHECK-NEXT: vmv1r.v v0, v6
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v25, v7, 2
-; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 48
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: mv a1, a7
-; CHECK-NEXT: bltu a7, a3, .LBB16_12
-; CHECK-NEXT: # %bb.11:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: .LBB16_12:
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 4
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv4r.v v24, v16
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 3
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 40
-; CHECK-NEXT: mul a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 40
-; CHECK-NEXT: mul a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 6
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vslideup.vi v16, v24, 16
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 6
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a4, a1, -16
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 56
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v16, v8, 16
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 56
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 48
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 40
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v8, v16, 16
-; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 48
-; CHECK-NEXT: mul a5, a5, a6
-; CHECK-NEXT: add a5, sp, a5
-; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
-; CHECK-NEXT: sltu a1, a1, a4
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a1, a1, a4
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 24
-; CHECK-NEXT: mul a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
-; CHECK-NEXT: bltu a7, a2, .LBB16_14
-; CHECK-NEXT: # %bb.13:
-; CHECK-NEXT: li a7, 16
-; CHECK-NEXT: .LBB16_14:
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vslideup.vi v24, v8, 16
-; CHECK-NEXT: vse32.v v24, (a0)
-; CHECK-NEXT: addi a1, a0, 256
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 48
-; CHECK-NEXT: mul a2, a2, a3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: vse32.v v8, (a1)
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 56
-; CHECK-NEXT: mul a2, a2, a3
-; CHECK-NEXT: add a2, sp, a2
-; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; CHECK-NEXT: vse32.v v8, (a1)
-; CHECK-NEXT: addi a0, a0, 384
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 6
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 72
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; RV32-LABEL: vtrunc_v128i32_v128i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset s0, -4
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 72
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: sub sp, sp, a2
+; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 72 * vlenb
+; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV32-NEXT: vmv1r.v v7, v0
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 5
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 40
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT: vslidedown.vi v5, v0, 8
+; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v4, v0, 4
+; RV32-NEXT: addi a2, a7, -64
+; RV32-NEXT: vslidedown.vi v3, v5, 4
+; RV32-NEXT: sltu a3, a7, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a4, a3, a2
+; RV32-NEXT: addi a2, a4, -32
+; RV32-NEXT: sltu a3, a4, a2
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a3, a3, a2
+; RV32-NEXT: li a2, 16
+; RV32-NEXT: addi t0, a3, -16
+; RV32-NEXT: mv a5, a3
+; RV32-NEXT: bltu a3, a2, .LBB16_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: li a5, 16
+; RV32-NEXT: .LBB16_2:
+; RV32-NEXT: li t2, 64
+; RV32-NEXT: sltu t1, a3, t0
+; RV32-NEXT: mv a6, a7
+; RV32-NEXT: bltu a7, t2, .LBB16_4
+; RV32-NEXT: # %bb.3:
+; RV32-NEXT: li a6, 64
+; RV32-NEXT: .LBB16_4:
+; RV32-NEXT: addi t3, a1, 128
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v6, v4, 2
+; RV32-NEXT: addi s0, a1, 512
+; RV32-NEXT: addi t6, a1, 640
+; RV32-NEXT: vslidedown.vi v0, v3, 2
+; RV32-NEXT: addi t1, t1, -1
+; RV32-NEXT: addi t2, a1, 384
+; RV32-NEXT: vslidedown.vi v2, v5, 2
+; RV32-NEXT: li a3, 32
+; RV32-NEXT: addi t4, a6, -32
+; RV32-NEXT: sltu a6, a6, t4
+; RV32-NEXT: addi a6, a6, -1
+; RV32-NEXT: and a6, a6, t4
+; RV32-NEXT: addi t4, a6, -16
+; RV32-NEXT: sltu t5, a6, t4
+; RV32-NEXT: addi t5, t5, -1
+; RV32-NEXT: bltu a6, a2, .LBB16_6
+; RV32-NEXT: # %bb.5:
+; RV32-NEXT: li a6, 16
+; RV32-NEXT: .LBB16_6:
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vle64.v v8, (s0)
+; RV32-NEXT: csrr s0, vlenb
+; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
+; RV32-NEXT: li a0, 56
+; RV32-NEXT: mul s0, s0, a0
+; RV32-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT: add s0, sp, s0
+; RV32-NEXT: addi s0, s0, 16
+; RV32-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill
+; RV32-NEXT: vle64.v v16, (t6)
+; RV32-NEXT: vle64.v v8, (t3)
+; RV32-NEXT: csrr t3, vlenb
+; RV32-NEXT: slli t3, t3, 3
+; RV32-NEXT: add t3, sp, t3
+; RV32-NEXT: addi t3, t3, 16
+; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill
+; RV32-NEXT: vle64.v v8, (a1)
+; RV32-NEXT: csrr t3, vlenb
+; RV32-NEXT: li t6, 48
+; RV32-NEXT: mul t3, t3, t6
+; RV32-NEXT: add t3, sp, t3
+; RV32-NEXT: addi t3, t3, 16
+; RV32-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill
+; RV32-NEXT: vle64.v v8, (t2)
+; RV32-NEXT: csrr t2, vlenb
+; RV32-NEXT: slli t2, t2, 4
+; RV32-NEXT: add t2, sp, t2
+; RV32-NEXT: addi t2, t2, 16
+; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill
+; RV32-NEXT: and t2, t1, t0
+; RV32-NEXT: and t1, t5, t4
+; RV32-NEXT: addi a1, a1, 256
+; RV32-NEXT: mv t0, a4
+; RV32-NEXT: bltu a4, a3, .LBB16_8
+; RV32-NEXT: # %bb.7:
+; RV32-NEXT: li t0, 32
+; RV32-NEXT: .LBB16_8:
+; RV32-NEXT: vsetvli zero, t2, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; RV32-NEXT: addi t2, sp, 16
+; RV32-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill
+; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: csrr t2, vlenb
+; RV32-NEXT: li t3, 56
+; RV32-NEXT: mul t2, t2, t3
+; RV32-NEXT: add t2, sp, t2
+; RV32-NEXT: addi t2, t2, 16
+; RV32-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: slli a5, a5, 6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vmv1r.v v0, v6
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: slli a5, a5, 3
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, t1, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: li t1, 24
+; RV32-NEXT: mul a5, a5, t1
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: addi a5, t0, -16
+; RV32-NEXT: sltu t0, t0, a5
+; RV32-NEXT: addi t0, t0, -1
+; RV32-NEXT: and a5, t0, a5
+; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vle64.v v16, (a1)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vi v6, v7, 2
+; RV32-NEXT: vmv1r.v v0, v4
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li t0, 48
+; RV32-NEXT: mul a1, a1, t0
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a6, 56
+; RV32-NEXT: mul a1, a1, a6
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vmv1r.v v0, v2
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v24, v16, 0, v0.t
+; RV32-NEXT: bltu a4, a2, .LBB16_10
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: li a4, 16
+; RV32-NEXT: .LBB16_10:
+; RV32-NEXT: vmv1r.v v0, v5
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a4, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a4, 48
+; RV32-NEXT: mul a1, a1, a4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: mv a1, a7
+; RV32-NEXT: bltu a7, a3, .LBB16_12
+; RV32-NEXT: # %bb.11:
+; RV32-NEXT: li a1, 32
+; RV32-NEXT: .LBB16_12:
+; RV32-NEXT: vmv1r.v v0, v6
+; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: li a5, 24
+; RV32-NEXT: mul a4, a4, a5
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: li a5, 24
+; RV32-NEXT: mul a4, a4, a5
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 6
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vslideup.vi v24, v8, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 6
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: addi a4, a1, -16
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: li a6, 56
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; RV32-NEXT: vslideup.vi v8, v16, 16
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: li a6, 56
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: li a6, 48
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: li a6, 24
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
+; RV32-NEXT: vslideup.vi v8, v16, 16
+; RV32-NEXT: csrr a5, vlenb
+; RV32-NEXT: li a6, 48
+; RV32-NEXT: mul a5, a5, a6
+; RV32-NEXT: add a5, sp, a5
+; RV32-NEXT: addi a5, a5, 16
+; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: sltu a1, a1, a4
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a1, a1, a4
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 5
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; RV32-NEXT: bltu a7, a2, .LBB16_14
+; RV32-NEXT: # %bb.13:
+; RV32-NEXT: li a7, 16
+; RV32-NEXT: .LBB16_14:
+; RV32-NEXT: vmv1r.v v0, v7
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 40
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vsetvli zero, a7, e32, m4, ta, ma
+; RV32-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV32-NEXT: vslideup.vi v16, v8, 16
+; RV32-NEXT: vse32.v v16, (a0)
+; RV32-NEXT: addi a1, a0, 256
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 48
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vse32.v v8, (a1)
+; RV32-NEXT: addi a1, a0, 128
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 56
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vse32.v v8, (a1)
+; RV32-NEXT: addi a0, a0, 384
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 6
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: li a1, 72
+; RV32-NEXT: mul a0, a0, a1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa sp, 32
+; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: .cfi_restore s0
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: .cfi_def_cfa_offset 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vtrunc_v128i32_v128i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -48
+; RV64-NEXT: .cfi_def_cfa_offset 48
+; RV64-NEXT: sd s0, 40(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset s0, -8
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 72
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: sub sp, sp, a2
+; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 72 * vlenb
+; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV64-NEXT: vmv1r.v v7, v0
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a2, a2, 5
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 32
+; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 40
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 32
+; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vslidedown.vi v5, v0, 8
+; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64-NEXT: vslidedown.vi v4, v0, 4
+; RV64-NEXT: addi a2, a7, -64
+; RV64-NEXT: vslidedown.vi v3, v5, 4
+; RV64-NEXT: sltu a3, a7, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a4, a3, a2
+; RV64-NEXT: addi a2, a4, -32
+; RV64-NEXT: sltu a3, a4, a2
+; RV64-NEXT: addi a3, a3, -1
+; RV64-NEXT: and a3, a3, a2
+; RV64-NEXT: li a2, 16
+; RV64-NEXT: addi t0, a3, -16
+; RV64-NEXT: mv a5, a3
+; RV64-NEXT: bltu a3, a2, .LBB16_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: li a5, 16
+; RV64-NEXT: .LBB16_2:
+; RV64-NEXT: li t2, 64
+; RV64-NEXT: sltu t1, a3, t0
+; RV64-NEXT: mv a6, a7
+; RV64-NEXT: bltu a7, t2, .LBB16_4
+; RV64-NEXT: # %bb.3:
+; RV64-NEXT: li a6, 64
+; RV64-NEXT: .LBB16_4:
+; RV64-NEXT: addi t3, a1, 128
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v6, v4, 2
+; RV64-NEXT: addi s0, a1, 512
+; RV64-NEXT: addi t6, a1, 640
+; RV64-NEXT: vslidedown.vi v0, v3, 2
+; RV64-NEXT: addi t1, t1, -1
+; RV64-NEXT: addi t2, a1, 384
+; RV64-NEXT: vslidedown.vi v2, v5, 2
+; RV64-NEXT: li a3, 32
+; RV64-NEXT: addi t4, a6, -32
+; RV64-NEXT: sltu a6, a6, t4
+; RV64-NEXT: addi a6, a6, -1
+; RV64-NEXT: and a6, a6, t4
+; RV64-NEXT: addi t4, a6, -16
+; RV64-NEXT: sltu t5, a6, t4
+; RV64-NEXT: addi t5, t5, -1
+; RV64-NEXT: bltu a6, a2, .LBB16_6
+; RV64-NEXT: # %bb.5:
+; RV64-NEXT: li a6, 16
+; RV64-NEXT: .LBB16_6:
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v8, (s0)
+; RV64-NEXT: csrr s0, vlenb
+; RV64-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: li a0, 56
+; RV64-NEXT: mul s0, s0, a0
+; RV64-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: add s0, sp, s0
+; RV64-NEXT: addi s0, s0, 32
+; RV64-NEXT: vs8r.v v8, (s0) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v16, (t6)
+; RV64-NEXT: vle64.v v8, (t3)
+; RV64-NEXT: csrr t3, vlenb
+; RV64-NEXT: slli t3, t3, 3
+; RV64-NEXT: add t3, sp, t3
+; RV64-NEXT: addi t3, t3, 32
+; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v8, (a1)
+; RV64-NEXT: csrr t3, vlenb
+; RV64-NEXT: li t6, 48
+; RV64-NEXT: mul t3, t3, t6
+; RV64-NEXT: add t3, sp, t3
+; RV64-NEXT: addi t3, t3, 32
+; RV64-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v8, (t2)
+; RV64-NEXT: csrr t2, vlenb
+; RV64-NEXT: slli t2, t2, 4
+; RV64-NEXT: add t2, sp, t2
+; RV64-NEXT: addi t2, t2, 32
+; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill
+; RV64-NEXT: and t2, t1, t0
+; RV64-NEXT: and t1, t5, t4
+; RV64-NEXT: addi a1, a1, 256
+; RV64-NEXT: mv t0, a4
+; RV64-NEXT: bltu a4, a3, .LBB16_8
+; RV64-NEXT: # %bb.7:
+; RV64-NEXT: li t0, 32
+; RV64-NEXT: .LBB16_8:
+; RV64-NEXT: vsetvli zero, t2, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; RV64-NEXT: addi t2, sp, 32
+; RV64-NEXT: vs8r.v v8, (t2) # Unknown-size Folded Spill
+; RV64-NEXT: vmv1r.v v0, v3
+; RV64-NEXT: csrr t2, vlenb
+; RV64-NEXT: li t3, 56
+; RV64-NEXT: mul t2, t2, t3
+; RV64-NEXT: add t2, sp, t2
+; RV64-NEXT: addi t2, t2, 32
+; RV64-NEXT: vl8r.v v24, (t2) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: slli a5, a5, 6
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV64-NEXT: vmv1r.v v0, v6
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: slli a5, a5, 3
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, t1, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: li t1, 24
+; RV64-NEXT: mul a5, a5, t1
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV64-NEXT: addi a5, t0, -16
+; RV64-NEXT: sltu t0, t0, a5
+; RV64-NEXT: addi t0, t0, -1
+; RV64-NEXT: and a5, t0, a5
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vle64.v v16, (a1)
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vi v6, v7, 2
+; RV64-NEXT: vmv1r.v v0, v4
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li t0, 48
+; RV64-NEXT: mul a1, a1, t0
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a6, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a6, 56
+; RV64-NEXT: mul a1, a1, a6
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vmv1r.v v0, v2
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v24, v16, 0, v0.t
+; RV64-NEXT: bltu a4, a2, .LBB16_10
+; RV64-NEXT: # %bb.9:
+; RV64-NEXT: li a4, 16
+; RV64-NEXT: .LBB16_10:
+; RV64-NEXT: vmv1r.v v0, v5
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a4, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a4, 48
+; RV64-NEXT: mul a1, a1, a4
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: mv a1, a7
+; RV64-NEXT: bltu a7, a3, .LBB16_12
+; RV64-NEXT: # %bb.11:
+; RV64-NEXT: li a1, 32
+; RV64-NEXT: .LBB16_12:
+; RV64-NEXT: vmv1r.v v0, v6
+; RV64-NEXT: addi a4, sp, 32
+; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a4, vlenb
+; RV64-NEXT: li a5, 24
+; RV64-NEXT: mul a4, a4, a5
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: addi a4, a4, 32
+; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a4, vlenb
+; RV64-NEXT: li a5, 24
+; RV64-NEXT: mul a4, a4, a5
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: addi a4, a4, 32
+; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a4, vlenb
+; RV64-NEXT: slli a4, a4, 6
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: addi a4, a4, 32
+; RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vslideup.vi v24, v8, 16
+; RV64-NEXT: csrr a4, vlenb
+; RV64-NEXT: slli a4, a4, 6
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: addi a4, a4, 32
+; RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; RV64-NEXT: addi a4, a1, -16
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: li a6, 56
+; RV64-NEXT: mul a5, a5, a6
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; RV64-NEXT: vslideup.vi v8, v16, 16
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: li a6, 56
+; RV64-NEXT: mul a5, a5, a6
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: li a6, 48
+; RV64-NEXT: mul a5, a5, a6
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: li a6, 24
+; RV64-NEXT: mul a5, a5, a6
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
+; RV64-NEXT: vslideup.vi v8, v16, 16
+; RV64-NEXT: csrr a5, vlenb
+; RV64-NEXT: li a6, 48
+; RV64-NEXT: mul a5, a5, a6
+; RV64-NEXT: add a5, sp, a5
+; RV64-NEXT: addi a5, a5, 32
+; RV64-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
+; RV64-NEXT: sltu a1, a1, a4
+; RV64-NEXT: addi a1, a1, -1
+; RV64-NEXT: and a1, a1, a4
+; RV64-NEXT: csrr a4, vlenb
+; RV64-NEXT: slli a4, a4, 5
+; RV64-NEXT: add a4, sp, a4
+; RV64-NEXT: addi a4, a4, 32
+; RV64-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v8, v16, 0, v0.t
+; RV64-NEXT: bltu a7, a2, .LBB16_14
+; RV64-NEXT: # %bb.13:
+; RV64-NEXT: li a7, 16
+; RV64-NEXT: .LBB16_14:
+; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 40
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vsetvli zero, a7, e32, m4, ta, ma
+; RV64-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; RV64-NEXT: vslideup.vi v16, v8, 16
+; RV64-NEXT: vse32.v v16, (a0)
+; RV64-NEXT: addi a1, a0, 256
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 48
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 32
+; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: vse32.v v8, (a1)
+; RV64-NEXT: addi a1, a0, 128
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: li a3, 56
+; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 32
+; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: vse32.v v8, (a1)
+; RV64-NEXT: addi a0, a0, 384
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 6
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 32
+; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vse32.v v8, (a0)
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: li a1, 72
+; RV64-NEXT: mul a0, a0, a1
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: .cfi_def_cfa sp, 48
+; RV64-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT: .cfi_restore s0
+; RV64-NEXT: addi sp, sp, 48
+; RV64-NEXT: .cfi_def_cfa_offset 0
+; RV64-NEXT: ret
%v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl)
ret <128 x i32> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
index 6c9989775f790..8e2e8f3fb0dec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
@@ -402,29 +402,29 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) {
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: .LBB32_2:
-; CHECK-NEXT: addi a4, a3, -16
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v0, v8, 2
-; CHECK-NEXT: sltu a3, a3, a4
-; CHECK-NEXT: addi a3, a3, -1
-; CHECK-NEXT: and a3, a3, a4
+; CHECK-NEXT: addi a5, a3, -16
; CHECK-NEXT: addi a4, a1, 128
-; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a4), v0.t
-; CHECK-NEXT: addi a3, a2, -32
-; CHECK-NEXT: sltu a4, a2, a3
-; CHECK-NEXT: addi a4, a4, -1
-; CHECK-NEXT: and a4, a4, a3
+; CHECK-NEXT: addi a7, a2, -32
+; CHECK-NEXT: sltu a3, a3, a5
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a6, a3, a5
+; CHECK-NEXT: sltu a3, a2, a7
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a5, a3, a7
; CHECK-NEXT: li a3, 16
-; CHECK-NEXT: bltu a4, a3, .LBB32_4
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v8, 2
+; CHECK-NEXT: bltu a5, a3, .LBB32_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: li a4, 16
+; CHECK-NEXT: li a5, 16
; CHECK-NEXT: .LBB32_4:
+; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a4), v0.t
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v8, 4
-; CHECK-NEXT: addi a5, a1, 256
-; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a5), v0.t
+; CHECK-NEXT: addi a4, a1, 256
+; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v24, (a4), v0.t
; CHECK-NEXT: bltu a2, a3, .LBB32_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a2, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 557882ee31d4c..984bc5b2c7352 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -26,9 +26,10 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vslide1down.vx v10, v10, a4
; RV32-NEXT: vslide1down.vx v10, v10, a2
; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu
; RV32-NEXT: vle32.v v8, (a0), v0.t
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: ret
@@ -56,9 +57,10 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vslide1down.vx v10, v10, a4
; RV64-NEXT: vslide1down.vx v10, v10, a2
; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu
; RV64-NEXT: vle32.v v8, (a0), v0.t
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: ret
@@ -94,9 +96,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vslide1down.vx v10, v10, a4
; RV32-NEXT: vslide1down.vx v10, v10, a2
; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: ret
@@ -124,9 +127,10 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vslide1down.vx v10, v10, a4
; RV64-NEXT: vslide1down.vx v10, v10, a2
; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmerge.vxm v8, v8, a0, v0
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: ret
@@ -163,9 +167,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vslide1down.vx v10, v10, a3
; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: ret
@@ -193,9 +198,10 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vslide1down.vx v10, v10, a3
; RV64-NEXT: vslide1down.vx v10, v10, a1
; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: ret
@@ -231,9 +237,10 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vslide1down.vx v10, v10, a4
; RV32-NEXT: vslide1down.vx v10, v10, a2
; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu
; RV32-NEXT: vle32.v v8, (a0), v0.t
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: ret
@@ -261,9 +268,10 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vslide1down.vx v10, v10, a4
; RV64-NEXT: vslide1down.vx v10, v10, a2
; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu
; RV64-NEXT: vle32.v v8, (a0), v0.t
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: ret
@@ -299,9 +307,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vslide1down.vx v10, v10, a3
; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: ret
@@ -329,9 +338,10 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vslide1down.vx v10, v10, a3
; RV64-NEXT: vslide1down.vx v10, v10, a1
; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: ret
@@ -368,9 +378,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vslide1down.vx v10, v10, a3
; RV32-NEXT: vslide1down.vx v10, v10, a1
; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV32-NEXT: vand.vi v10, v10, 1
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, 0, v0
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: ret
@@ -398,9 +409,10 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vslide1down.vx v10, v10, a3
; RV64-NEXT: vslide1down.vx v10, v10, a1
; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
; RV64-NEXT: vand.vi v10, v10, 1
; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmerge.vim v8, v8, 0, v0
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index f9b5095c9af1d..9b5bde2814fda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -1515,40 +1515,36 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 2
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index d56e46f7db3ab..9d18cd33889c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -1648,14 +1648,14 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index 81e4a548f560e..87a08f18f9dea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -1648,14 +1648,14 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v24, v8, v0.t
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
index b569efc7447da..cb7961cb9bd8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
@@ -1186,21 +1186,21 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a5, a3, 3
-; CHECK-NEXT: srli a1, a3, 3
-; CHECK-NEXT: sub a6, a4, a3
-; CHECK-NEXT: vslidedown.vx v0, v0, a1
-; CHECK-NEXT: add a1, a2, a5
-; CHECK-NEXT: vl8re64.v v8, (a1)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: sltu a1, a4, a6
-; CHECK-NEXT: addi a1, a1, -1
-; CHECK-NEXT: and a6, a1, a6
-; CHECK-NEXT: li a1, 63
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a5, a1, 3
+; CHECK-NEXT: srli a3, a1, 3
+; CHECK-NEXT: sub a6, a4, a1
+; CHECK-NEXT: vslidedown.vx v0, v0, a3
+; CHECK-NEXT: add a3, a2, a5
+; CHECK-NEXT: vl8re64.v v8, (a3)
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: sltu a3, a4, a6
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a6, a3, a6
+; CHECK-NEXT: li a3, 63
; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
; CHECK-NEXT: csrr a6, vlenb
; CHECK-NEXT: slli a6, a6, 4
; CHECK-NEXT: add a6, sp, a6
@@ -1227,7 +1227,7 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload
; CHECK-NEXT: vnot.v v8, v8, v0.t
; CHECK-NEXT: vl8re64.v v16, (a5)
-; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
; CHECK-NEXT: addi a5, sp, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v8, (a0)
@@ -1257,9 +1257,9 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: bltu a4, a3, .LBB47_2
+; CHECK-NEXT: bltu a4, a1, .LBB47_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a4, a1
; CHECK-NEXT: .LBB47_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
@@ -1268,12 +1268,12 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -1281,8 +1281,8 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -1292,7 +1292,7 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vnot.v v8, v8, v0.t
-; CHECK-NEXT: vand.vx v16, v8, a1, v0.t
+; CHECK-NEXT: vand.vx v16, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index 937b3e6636df8..a325829d472db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -1498,59 +1498,66 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va,
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: lui a2, %hi(.LCPI44_0)
; CHECK-NEXT: srli a3, a1, 3
; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2)
; CHECK-NEXT: sub a2, a0, a1
-; CHECK-NEXT: vslidedown.vx v6, v0, a3
+; CHECK-NEXT: vslidedown.vx v25, v0, a3
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: vfabs.v v8, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t
; CHECK-NEXT: frflags a2
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
; CHECK-NEXT: fsflags a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
+; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB44_2:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t
; CHECK-NEXT: frflags a0
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: fsflags a0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index 39744dcecd718..bc4b3ad7f79f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -1515,40 +1515,36 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 4
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index df5844277c997..ab26be9d2ce08 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -1515,40 +1515,36 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va,
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 0
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index 1300d8cd64ebb..75615fe0fe759 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -1515,40 +1515,36 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16, v0.t
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a2, 1
; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
-; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfabs.v v16, v8, v0.t
+; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t
+; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
; CHECK-NEXT: fsrmi a0, 1
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
+; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
-; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index e6272701a6033..91442e2b75682 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1398,8 +1398,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi t0, t0, 16
; CHECK-NEXT: vs1r.v v24, (t0) # Unknown-size Folded Spill
; CHECK-NEXT: vslidedown.vx v25, v24, a1
-; CHECK-NEXT: vsetvli t0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v24, v25, a3
; CHECK-NEXT: vl8re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli t0, a0, 5
@@ -1407,6 +1405,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vx v24, v25, a3
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli t0, a0, 3
; CHECK-NEXT: add a0, t0, a0
@@ -1425,15 +1425,15 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: bltu a6, a4, .LBB85_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a6, a4
+; CHECK-NEXT: .LBB85_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t
-; CHECK-NEXT: bltu a6, a4, .LBB85_2
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a6, a4
-; CHECK-NEXT: .LBB85_2:
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a7, a0, 3
@@ -3598,8 +3598,6 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi t0, t0, 16
; ZVFHMIN-NEXT: vs1r.v v24, (t0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vslidedown.vx v25, v24, a1
-; ZVFHMIN-NEXT: vsetvli t0, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v24, v25, a3
; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli t0, a0, 5
@@ -3607,6 +3605,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslidedown.vx v24, v25, a3
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli t0, a0, 3
; ZVFHMIN-NEXT: add a0, t0, a0
@@ -3625,15 +3625,15 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: bltu a6, a4, .LBB171_2
+; ZVFHMIN-NEXT: # %bb.1:
+; ZVFHMIN-NEXT: mv a6, a4
+; ZVFHMIN-NEXT: .LBB171_2:
; ZVFHMIN-NEXT: vmv1r.v v0, v24
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v8, v0.t
-; ZVFHMIN-NEXT: bltu a6, a4, .LBB171_2
-; ZVFHMIN-NEXT: # %bb.1:
-; ZVFHMIN-NEXT: mv a6, a4
-; ZVFHMIN-NEXT: .LBB171_2:
; ZVFHMIN-NEXT: vmv1r.v v0, v25
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a7, a0, 3
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll b/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll
index 10929394af75f..16074250a8351 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-extract-last-active.ll
@@ -5,13 +5,14 @@
define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) {
; CHECK-LABEL: extract_last_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmsne.vi v0, v9, 0
-; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vcpop.m a1, v0
-; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: beqz a1, .LBB0_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: vredmaxu.vs v9, v9, v9
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: andi a0, a0, 255
@@ -29,12 +30,12 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmsne.vi v0, v9, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vcpop.m a1, v0
-; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: beqz a1, .LBB1_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: vredmaxu.vs v9, v9, v9
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: andi a0, a0, 255
@@ -53,12 +54,12 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmsne.vi v0, v9, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vcpop.m a1, v0
-; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: beqz a1, .LBB2_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: vredmaxu.vs v9, v9, v9
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: andi a0, a0, 255
@@ -77,14 +78,14 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vmsne.vi v0, v9, 0
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: vcpop.m a2, v0
-; RV32-NEXT: vid.v v9, v0.t
; RV32-NEXT: beqz a2, .LBB3_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: vredmaxu.vs v9, v9, v9
+; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
+; RV32-NEXT: vmv.v.i v9, 0
; RV32-NEXT: li a1, 32
+; RV32-NEXT: vid.v v9, v0.t
+; RV32-NEXT: vredmaxu.vs v9, v9, v9
; RV32-NEXT: vmv.x.s a0, v9
; RV32-NEXT: andi a0, a0, 255
; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma
@@ -100,12 +101,12 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vmsne.vi v0, v9, 0
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; RV64-NEXT: vmv.v.i v9, 0
; RV64-NEXT: vcpop.m a1, v0
-; RV64-NEXT: vid.v v9, v0.t
; RV64-NEXT: beqz a1, .LBB3_2
; RV64-NEXT: # %bb.1:
+; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
+; RV64-NEXT: vmv.v.i v9, 0
+; RV64-NEXT: vid.v v9, v0.t
; RV64-NEXT: vredmaxu.vs v9, v9, v9
; RV64-NEXT: vmv.x.s a0, v9
; RV64-NEXT: andi a0, a0, 255
@@ -124,12 +125,12 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmsne.vi v0, v9, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vcpop.m a0, v0
-; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: beqz a0, .LBB4_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: vredmaxu.vs v9, v9, v9
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: andi a0, a0, 255
@@ -148,12 +149,12 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vmsne.vi v0, v9, 0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: vcpop.m a0, v0
-; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: beqz a0, .LBB5_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vid.v v9, v0.t
; CHECK-NEXT: vredmaxu.vs v9, v9, v9
; CHECK-NEXT: vmv.x.s a0, v9
; CHECK-NEXT: andi a0, a0, 255
@@ -170,12 +171,13 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) {
; CHECK-LABEL: extract_last_i8_scalable:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vcpop.m a1, v0
-; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: beqz a1, .LBB6_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu
+; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: vredmaxu.vs v10, v10, v10
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: andi a0, a0, 255
@@ -191,12 +193,13 @@ define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1>
define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) {
; CHECK-LABEL: extract_last_i16_scalable:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vcpop.m a1, v0
-; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: beqz a1, .LBB7_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: vredmaxu.vs v10, v10, v10
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: andi a0, a0, 255
@@ -212,12 +215,13 @@ define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1
define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) {
; CHECK-LABEL: extract_last_i32_scalable:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vcpop.m a1, v0
-; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: beqz a1, .LBB8_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: vredmaxu.vs v10, v10, v10
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: andi a0, a0, 255
@@ -233,14 +237,15 @@ define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1
define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) {
; RV32-LABEL: extract_last_i64_scalable:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, mu
-; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vcpop.m a2, v0
-; RV32-NEXT: vid.v v10, v0.t
; RV32-NEXT: beqz a2, .LBB9_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: vredmaxu.vs v10, v10, v10
+; RV32-NEXT: vmv.v.i v10, 0
; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; RV32-NEXT: vid.v v10, v0.t
+; RV32-NEXT: vredmaxu.vs v10, v10, v10
; RV32-NEXT: vmv.x.s a0, v10
; RV32-NEXT: andi a0, a0, 255
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
@@ -254,12 +259,13 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1
;
; RV64-LABEL: extract_last_i64_scalable:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, mu
-; RV64-NEXT: vmv.v.i v10, 0
+; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-NEXT: vcpop.m a1, v0
-; RV64-NEXT: vid.v v10, v0.t
; RV64-NEXT: beqz a1, .LBB9_2
; RV64-NEXT: # %bb.1:
+; RV64-NEXT: vmv.v.i v10, 0
+; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; RV64-NEXT: vid.v v10, v0.t
; RV64-NEXT: vredmaxu.vs v10, v10, v10
; RV64-NEXT: vmv.x.s a0, v10
; RV64-NEXT: andi a0, a0, 255
@@ -275,12 +281,13 @@ define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1
define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) {
; CHECK-LABEL: extract_last_float_scalable:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
-; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: beqz a0, .LBB10_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: vredmaxu.vs v10, v10, v10
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: andi a0, a0, 255
@@ -296,12 +303,13 @@ define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x
define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) {
; CHECK-LABEL: extract_last_double_scalable:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
-; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: beqz a0, .LBB11_2
; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
+; CHECK-NEXT: vid.v v10, v0.t
; CHECK-NEXT: vredmaxu.vs v10, v10, v10
; CHECK-NEXT: vmv.x.s a0, v10
; CHECK-NEXT: andi a0, a0, 255
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
index 6435c1c14e061..fd1dbab2362a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-reassociations.ll
@@ -222,9 +222,9 @@ define <vscale x 1 x i8> @vadd_vv_mask_negative(<vscale x 1 x i8> %0, <vscale x
; CHECK-NEXT: vmv1r.v v11, v8
; CHECK-NEXT: vadd.vv v11, v8, v9, v0.t
; CHECK-NEXT: vmv1r.v v9, v8
-; CHECK-NEXT: vadd.vv v9, v8, v11, v0.t
+; CHECK-NEXT: vadd.vv v9, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vadd.vv v8, v9, v11, v0.t
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vadd.mask.nxv1i8.nxv1i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
index 9e78bbdc4f441..6831d1fb63cae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
@@ -429,16 +429,16 @@ define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
@@ -1242,16 +1242,16 @@ define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
index 532629ef7a8a8..06ddade805e3a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
@@ -391,16 +391,16 @@ define <vscale x 32 x bfloat> @vfdiv_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
@@ -1154,16 +1154,16 @@ define <vscale x 32 x half> @vfdiv_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index 5ee5d40d8313d..190c91fa8c717 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -8503,10 +8503,9 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a2, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmv8r.v v24, v8
-; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
+; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: lui a2, 8
+; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
; ZVFHMIN-NEXT: vmset.m v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a0, a3, 1
@@ -8516,25 +8515,25 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a3
; ZVFHMIN-NEXT: sltu a3, a1, a4
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v16, v8, a2
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a3, a3, a4
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
-; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: addi a2, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv4r.v v16, v8
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -8543,35 +8542,32 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 3
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
; ZVFHMIN-NEXT: .LBB281_2:
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: mv a1, a0
@@ -8579,15 +8575,12 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v24
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
@@ -10079,36 +10072,34 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a3, a3, a4
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
-; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: add a4, sp, a4
-; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: addi a4, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: mv a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: vmv4r.v v16, v8
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: addi a2, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
@@ -10119,34 +10110,31 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %
; ZVFHMIN-NEXT: mv a0, a1
; ZVFHMIN-NEXT: .LBB292_2:
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v0, v24, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
@@ -10307,6 +10295,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv1r.v v7, v0
+; ZVFHMIN-NEXT: vmv8r.v v24, v16
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -10315,11 +10304,11 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: vmv.v.x v24, a1
+; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: slli a1, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: vxor.vx v8, v24, a2, v0.t
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t
+; ZVFHMIN-NEXT: vxor.vx v24, v24, a2, v0.t
; ZVFHMIN-NEXT: sub a2, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3
@@ -10330,14 +10319,15 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv4r.v v16, v8
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -10345,7 +10335,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 5
@@ -10481,7 +10471,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vmv.v.x v16, a1
; ZVFHMIN-NEXT: slli a1, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v16, a2, v0.t
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: mv a5, a4
@@ -10489,8 +10479,8 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a2, v0.t
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a2, v0.t
; ZVFHMIN-NEXT: sub a2, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a3
@@ -10501,9 +10491,9 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
@@ -10516,9 +10506,8 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
-; ZVFHMIN-NEXT: vmv8r.v v16, v8
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 5
; ZVFHMIN-NEXT: add a2, sp, a2
@@ -12433,35 +12422,35 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: lui a2, 8
; ZVFHMIN-NEXT: vsetvli a3, zero, e8, m4, ta, ma
-; ZVFHMIN-NEXT: vmset.m v8
+; ZVFHMIN-NEXT: vmset.m v24
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v24, a1
+; ZVFHMIN-NEXT: vmv.v.x v8, a1
; ZVFHMIN-NEXT: slli a1, a3, 1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: vxor.vx v24, v24, a2
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a2
; ZVFHMIN-NEXT: sub a2, a0, a1
; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a3
; ZVFHMIN-NEXT: sltu a3, a0, a2
; ZVFHMIN-NEXT: addi a3, a3, -1
; ZVFHMIN-NEXT: and a2, a3, a2
-; ZVFHMIN-NEXT: vmv4r.v v8, v24
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: mv a3, a2
@@ -12469,14 +12458,14 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
+; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t
+; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8, v0.t
; ZVFHMIN-NEXT: bltu a0, a1, .LBB309_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a0, a1
@@ -12485,9 +12474,9 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
@@ -12503,14 +12492,15 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute(<vsc
; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
+; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
index 4523b43274eff..cb2f642b38f32 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll
@@ -201,16 +201,16 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
@@ -570,16 +570,16 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
index a621dc282beb3..42fe4521b65f1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll
@@ -201,16 +201,16 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
@@ -570,16 +570,16 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
index c1617cd365216..2742ce7efd188 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
@@ -513,16 +513,16 @@ define <vscale x 32 x half> @vfmul_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
index 63156e1399293..9d5005f9c5ed0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
@@ -96,16 +96,8 @@ declare <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x
define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: sub a3, a0, a1
@@ -113,24 +105,16 @@ define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double>
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
-; CHECK-NEXT: vmv8r.v v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 %vl)
ret <vscale x 16 x float> %v
@@ -144,58 +128,68 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 3
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: slli a6, a1, 3
-; CHECK-NEXT: slli a4, a1, 1
-; CHECK-NEXT: vslidedown.vx v16, v0, a5
-; CHECK-NEXT: add a6, a0, a6
-; CHECK-NEXT: sub a5, a2, a4
-; CHECK-NEXT: vl8re64.v v24, (a6)
-; CHECK-NEXT: sltu a6, a2, a5
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: sub a6, a5, a1
-; CHECK-NEXT: sltu a7, a5, a6
-; CHECK-NEXT: addi a7, a7, -1
; CHECK-NEXT: vl8re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v16, a3
-; CHECK-NEXT: and a0, a7, a6
-; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
-; CHECK-NEXT: bltu a5, a1, .LBB8_2
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: srli a5, a1, 3
+; CHECK-NEXT: slli a4, a1, 3
+; CHECK-NEXT: slli a3, a1, 1
+; CHECK-NEXT: add a6, a0, a4
+; CHECK-NEXT: sub a0, a2, a3
+; CHECK-NEXT: sltu a4, a2, a0
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a0, a4, a0
+; CHECK-NEXT: sub a4, a0, a1
+; CHECK-NEXT: sltu a7, a0, a4
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a4, a7, a4
+; CHECK-NEXT: srli a7, a1, 2
+; CHECK-NEXT: vl8re64.v v8, (a6)
+; CHECK-NEXT: vslidedown.vx v16, v0, a7
+; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v25, v0, a5
+; CHECK-NEXT: vslidedown.vx v0, v16, a5
+; CHECK-NEXT: bltu a0, a1, .LBB8_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a5, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB8_2:
+; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
+; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v6, v7, a3
-; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
-; CHECK-NEXT: bltu a2, a4, .LBB8_4
+; CHECK-NEXT: bltu a2, a3, .LBB8_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB8_4:
; CHECK-NEXT: sub a0, a2, a1
; CHECK-NEXT: sltu a3, a2, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
-; CHECK-NEXT: vmv1r.v v0, v6
-; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 3
+; CHECK-NEXT: add a3, sp, a3
+; CHECK-NEXT: addi a3, a3, 16
; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t
@@ -203,9 +197,9 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB8_6:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -213,7 +207,8 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
index 059408a1c9c3f..056c7557440e0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
@@ -391,16 +391,16 @@ define <vscale x 32 x bfloat> @vfsub_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
; CHECK-NEXT: sltu a2, a0, a3
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t
; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
@@ -1154,16 +1154,16 @@ define <vscale x 32 x half> @vfsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: sltu a2, a0, a3
; ZVFHMIN-NEXT: addi a2, a2, -1
; ZVFHMIN-NEXT: and a2, a2, a3
-; ZVFHMIN-NEXT: vmv4r.v v8, v16
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index e481891dfd52f..00d27dd583c8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -331,8 +331,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v8, v0
+; RV32-NEXT: slli a2, a1, 1
; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: li a2, -1
+; RV32-NEXT: li a1, -1
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.i v10, 0
; RV32-NEXT: csrr a3, vlenb
@@ -340,21 +341,19 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract(<vscale x 2 x i1> %
; RV32-NEXT: vmerge.vim v11, v9, 1, v0
; RV32-NEXT: srli a3, a3, 2
; RV32-NEXT: vwaddu.vv v12, v11, v11
-; RV32-NEXT: vwmaccu.vx v12, a2, v11
+; RV32-NEXT: vwmaccu.vx v12, a1, v11
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vx v11, v12, a3
; RV32-NEXT: vmerge.vim v10, v10, 1, v0
-; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV32-NEXT: vmsne.vi v0, v11, 0
-; RV32-NEXT: add a2, a3, a3
+; RV32-NEXT: add a1, a3, a3
; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vx v10, v9, a3
-; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: slli a2, a1, 1
-; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma
@@ -676,6 +675,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv1r.v v9, v0
; RV32-NEXT: vmv1r.v v0, v8
+; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: li a2, -1
; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma
@@ -698,10 +698,8 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @not_same_mask(<vscale x 2 x i1>
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vx v10, v8, a3
-; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: slli a1, a1, 1
-; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; RV32-NEXT: vle32.v v10, (a0), v0.t
; RV32-NEXT: li a0, 32
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 1007d1ce649cc..68e7297605be2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -304,12 +304,12 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8
; RV64-NEXT: vslidedown.vx v0, v13, a4
; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v11
-; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t
; RV64-NEXT: bltu a5, a2, .LBB12_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a5, a2
; RV64-NEXT: .LBB12_2:
+; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV64-NEXT: vluxei64.v v11, (a0), v16, v0.t
; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma
; RV64-NEXT: vsext.vf8 v16, v10
; RV64-NEXT: vmv1r.v v0, v13
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index 0844180e49612..edfa4a7560949 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -572,23 +572,23 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: sub a6, a4, a3
; CHECK-NEXT: slli a7, a3, 3
-; CHECK-NEXT: srli t0, a3, 3
-; CHECK-NEXT: sub a5, a2, a5
-; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, t0
; CHECK-NEXT: sltu t0, a4, a6
-; CHECK-NEXT: add a7, a0, a7
; CHECK-NEXT: addi t0, t0, -1
; CHECK-NEXT: and a6, t0, a6
-; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v16, (a7), v0.t
-; CHECK-NEXT: sltu a2, a2, a5
+; CHECK-NEXT: srli t0, a3, 3
+; CHECK-NEXT: sub t1, a2, a5
+; CHECK-NEXT: add a5, a0, a7
+; CHECK-NEXT: sltu a2, a2, t1
; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a2, a2, a5
+; CHECK-NEXT: and a2, a2, t1
+; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v0, v8, t0
; CHECK-NEXT: bltu a2, a3, .LBB45_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB45_4:
+; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a5), v0.t
; CHECK-NEXT: slli a5, a3, 4
; CHECK-NEXT: srli a6, a3, 2
; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index ccea5b05fb03c..b6cf9e1a9cb97 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -347,21 +347,22 @@ define double @vpreduce_ord_fadd_nxv4f64(double %s, <vscale x 4 x double> %v, <v
define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vreduce_fminimum_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t
-; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t
-; CHECK-NEXT: vcpop.m a0, v11, v0.t
+; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: vcpop.m a2, v10, v0.t
; CHECK-NEXT: xori a1, a1, 1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB22_2
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: beqz a1, .LBB22_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmin.vs v10, v8, v10, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v10
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fminimum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
@@ -371,21 +372,22 @@ define float @vreduce_fminimum_nxv4f32(float %start, <vscale x 4 x float> %val,
define float @vreduce_fmaximum_nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vreduce_fmaximum_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t
-; CHECK-NEXT: vmfne.vv v11, v8, v8, v0.t
-; CHECK-NEXT: vcpop.m a0, v11, v0.t
+; CHECK-NEXT: vmfne.vv v10, v8, v8, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: vcpop.m a2, v10, v0.t
; CHECK-NEXT: xori a1, a1, 1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB23_2
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: beqz a1, .LBB23_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfredmax.vs v10, v8, v10, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v10
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fmaximum.nxv4f32(float %start, <vscale x 4 x float> %val, <vscale x 4 x i1> %m, i32 %evl)
@@ -421,21 +423,22 @@ define float @vreduce_fmaximum_nnan_nxv4f32(float %start, <vscale x 4 x float> %
define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vreduce_fminimum_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t
-; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t
-; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: vcpop.m a2, v9, v0.t
; CHECK-NEXT: xori a1, a1, 1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB26_2
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: beqz a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfredmin.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fminimum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
@@ -445,21 +448,22 @@ define float @vreduce_fminimum_v4f32(float %start, <4 x float> %val, <4 x i1> %m
define float @vreduce_fmaximum_v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vreduce_fmaximum_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: feq.s a1, fa0, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t
-; CHECK-NEXT: vmfne.vv v8, v8, v8, v0.t
-; CHECK-NEXT: vcpop.m a0, v8, v0.t
+; CHECK-NEXT: vmfne.vv v9, v8, v8, v0.t
+; CHECK-NEXT: feq.s a1, fa0, fa0
+; CHECK-NEXT: vcpop.m a2, v9, v0.t
; CHECK-NEXT: xori a1, a1, 1
-; CHECK-NEXT: or a0, a0, a1
-; CHECK-NEXT: beqz a0, .LBB27_2
+; CHECK-NEXT: or a1, a2, a1
+; CHECK-NEXT: beqz a1, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lui a0, 523264
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.s.f v9, fa0
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfredmax.vs v9, v8, v9, v0.t
; CHECK-NEXT: vfmv.f.s fa0, v9
; CHECK-NEXT: ret
%s = call float @llvm.vp.reduce.fmaximum.v4f32(float %start, <4 x float> %val, <4 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
index fd5bf4ebcede8..32d24778d7327 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
@@ -285,58 +285,68 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv1r.v v7, v0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a3, a1, 3
-; CHECK-NEXT: srli a5, a1, 2
-; CHECK-NEXT: slli a6, a1, 3
-; CHECK-NEXT: slli a4, a1, 1
-; CHECK-NEXT: vslidedown.vx v16, v0, a5
-; CHECK-NEXT: add a6, a0, a6
-; CHECK-NEXT: sub a5, a2, a4
-; CHECK-NEXT: vl8re64.v v24, (a6)
-; CHECK-NEXT: sltu a6, a2, a5
-; CHECK-NEXT: addi a6, a6, -1
-; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: sub a6, a5, a1
-; CHECK-NEXT: sltu a7, a5, a6
-; CHECK-NEXT: addi a7, a7, -1
; CHECK-NEXT: vl8re64.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v16, a3
-; CHECK-NEXT: and a0, a7, a6
-; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v20, v24, 0, v0.t
-; CHECK-NEXT: bltu a5, a1, .LBB17_2
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: srli a5, a1, 3
+; CHECK-NEXT: slli a4, a1, 3
+; CHECK-NEXT: slli a3, a1, 1
+; CHECK-NEXT: add a6, a0, a4
+; CHECK-NEXT: sub a0, a2, a3
+; CHECK-NEXT: sltu a4, a2, a0
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a0, a4, a0
+; CHECK-NEXT: sub a4, a0, a1
+; CHECK-NEXT: sltu a7, a0, a4
+; CHECK-NEXT: addi a7, a7, -1
+; CHECK-NEXT: and a4, a7, a4
+; CHECK-NEXT: srli a7, a1, 2
+; CHECK-NEXT: vl8re64.v v8, (a6)
+; CHECK-NEXT: vslidedown.vx v16, v0, a7
+; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v25, v0, a5
+; CHECK-NEXT: vslidedown.vx v0, v16, a5
+; CHECK-NEXT: bltu a0, a1, .LBB17_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a5, a1
+; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB17_2:
+; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t
; CHECK-NEXT: vmv1r.v v0, v16
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v6, v7, a3
-; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
+; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
-; CHECK-NEXT: bltu a2, a4, .LBB17_4
+; CHECK-NEXT: bltu a2, a3, .LBB17_4
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB17_4:
; CHECK-NEXT: sub a0, a2, a1
; CHECK-NEXT: sltu a3, a2, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
-; CHECK-NEXT: vmv1r.v v0, v6
-; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 3
+; CHECK-NEXT: add a3, sp, a3
+; CHECK-NEXT: addi a3, a3, 16
; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v28, v8, 0, v0.t
@@ -344,9 +354,9 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB17_6:
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -354,7 +364,8 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t
; CHECK-NEXT: vmv8r.v v8, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
More information about the llvm-commits
mailing list