[llvm] [RISCV] Improve cleanup phase of RISCV Insert VSETVLI pass (PR #67144)
Simeon K via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 08:47:37 PDT 2023
https://github.com/simeonkr updated https://github.com/llvm/llvm-project/pull/67144
>From 9e5471a7e3c7e755dc4d91d07c93006068c757d6 Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Tue, 19 Sep 2023 16:14:47 +0100
Subject: [PATCH 1/7] Improve cleanup phase of RISCV Insert VSETVLI pass
Make the pass better at eliminating redundant VSETVLIs in certain
scenarios, as demonstrated in the updated tests.
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 111 ++---
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 381 +++++++-----------
.../RISCV/rvv/vsetvli-insert-crossbb.ll | 3 +-
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 8 +-
.../CodeGen/RISCV/rvv/vsetvli-intrinsics.ll | 18 +-
5 files changed, 228 insertions(+), 293 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 5584fa8d503dbe4..138c28e4effabf6 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1426,43 +1426,6 @@ static bool isNonZeroAVL(const MachineOperand &MO) {
return 0 != MO.getImm();
}
-// Return true if we can mutate PrevMI to match MI without changing any the
-// fields which would be observed.
-static bool canMutatePriorConfig(const MachineInstr &PrevMI,
- const MachineInstr &MI,
- const DemandedFields &Used) {
- // If the VL values aren't equal, return false if either a) the former is
- // demanded, or b) we can't rewrite the former to be the later for
- // implementation reasons.
- if (!isVLPreservingConfig(MI)) {
- if (Used.VLAny)
- return false;
-
- // We don't bother to handle the equally zero case here as it's largely
- // uninteresting.
- if (Used.VLZeroness) {
- if (isVLPreservingConfig(PrevMI))
- return false;
- if (!isNonZeroAVL(MI.getOperand(1)) ||
- !isNonZeroAVL(PrevMI.getOperand(1)))
- return false;
- }
-
- // TODO: Track whether the register is defined between
- // PrevMI and MI.
- if (MI.getOperand(1).isReg() &&
- RISCV::X0 != MI.getOperand(1).getReg())
- return false;
- }
-
- if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
- return false;
-
- auto PriorVType = PrevMI.getOperand(2).getImm();
- auto VType = MI.getOperand(2).getImm();
- return areCompatibleVTYPEs(PriorVType, VType, Used);
-}
-
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
@@ -1475,36 +1438,74 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
if (!isVectorConfigInstr(MI)) {
doUnion(Used, getDemanded(MI, MRI));
+ // We can't handle the case when the source AVL
+ // register of *NextMI is defined after MI
+ if (NextMI && NextMI->getOperand(1).isReg() && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).getReg() == NextMI->getOperand(1).getReg())
+ Used.demandVL();
continue;
}
Register VRegDef = MI.getOperand(0).getReg();
if (VRegDef != RISCV::X0 &&
- !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
- Used.demandVL();
+ !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) {
+ for (MachineInstr &UserMI : MRI->use_nodbg_instructions(VRegDef)) {
+ // ignore uses by *NextMI since we will deal with them explicitly
+ if (!NextMI || !UserMI.isIdenticalTo(*NextMI)) {
+ Used.demandVL();
+ break;
+ }
+ }
+ }
if (NextMI) {
- if (!Used.usedVL() && !Used.usedVTYPE()) {
- ToDelete.push_back(&MI);
- // Leave NextMI unchanged
- continue;
- } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
- if (!isVLPreservingConfig(*NextMI)) {
- MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
- MI.getOperand(0).setIsDead(false);
- if (NextMI->getOperand(1).isImm())
- MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
- else
- MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
- MI.setDesc(NextMI->getDesc());
+ auto NextVRegDef = NextMI->getOperand(0).getReg();
+ auto &AVLOp = MI.getOperand(1), &NextAVLOp = NextMI->getOperand(1);
+ auto &VLTypeOp = MI.getOperand(2), &NextVLTypeOp = NextMI->getOperand(2);
+ if (areCompatibleVTYPEs(VLTypeOp.getImm(), NextVLTypeOp.getImm(), Used)) {
+ bool VLUsed =
+ Used.VLAny || (Used.VLZeroness &&
+ (!isNonZeroAVL(AVLOp) || !isNonZeroAVL(NextAVLOp)));
+ if (isVLPreservingConfig(*NextMI) || !VLUsed) {
+ // set VLType of MI to that of *NextMI
+ VLTypeOp.setImm(NextVLTypeOp.getImm());
+ // remove *NextMI and potentially replace its uses with MI
+ ToDelete.push_back(NextMI);
+ if (NextVRegDef != RISCV::X0 && !MRI->use_nodbg_empty(NextVRegDef)) {
+ if (VRegDef != RISCV::X0)
+ MRI->replaceRegWith(NextVRegDef, VRegDef);
+ else {
+ MI.getOperand(0).ChangeToRegister(
+ MRI->createVirtualRegister(&RISCV::GPRRegClass), true);
+ MRI->replaceRegWith(NextVRegDef, MI.getOperand(0).getReg());
+ }
+ }
+ // is the AVL op of *NextMI a non-zero reg storing the result of MI?
+ bool KeepAVL = NextAVLOp.isReg() && NextAVLOp.getReg() == VRegDef &&
+ VRegDef != RISCV::X0;
+ // set AVL of MI to that of *NextMI whenever it makes sense to do so
+ if (!isVLPreservingConfig(*NextMI) && !KeepAVL) {
+ if (NextAVLOp.isImm())
+ AVLOp.ChangeToImmediate(NextAVLOp.getImm());
+ else {
+ AVLOp.ChangeToRegister(NextAVLOp.getReg(), false);
+ // ensure that *MI did not inadvertently become VL-preserving
+ if (VRegDef == RISCV::X0 && AVLOp.getReg() == RISCV::X0)
+ MI.getOperand(0).ChangeToRegister(
+ MRI->createVirtualRegister(&RISCV::GPRRegClass), true);
+ }
+ MI.setDesc(NextMI->getDesc());
+ }
}
- MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
- ToDelete.push_back(NextMI);
- // fallthrough
}
}
NextMI = &MI;
Used = getDemanded(MI, MRI);
+ // ignore such uses since we can handle them explicitly
+ if (VRegDef == RISCV::X0) {
+ Used.VLAny = false;
+ Used.VLZeroness = false;
+ }
}
for (auto *MI : ToDelete)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 6ee0e4525f5ec72..9f597d574221f01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2530,10 +2530,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2557,9 +2556,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB35_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -2577,9 +2575,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
@@ -2590,10 +2587,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10
@@ -2613,9 +2609,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
@@ -2626,9 +2621,8 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -2680,10 +2674,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2707,9 +2700,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB36_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -2727,9 +2719,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
@@ -2740,10 +2731,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10
@@ -2763,9 +2753,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB36_11
@@ -2776,9 +2765,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -2834,10 +2822,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2862,9 +2849,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB37_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -2883,9 +2869,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
@@ -2897,9 +2882,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
@@ -2922,9 +2906,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB37_11
@@ -2936,9 +2919,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -2992,10 +2974,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3019,9 +3000,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB38_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -3039,9 +3019,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
@@ -3052,10 +3031,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10
@@ -3075,9 +3053,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB38_11
@@ -3088,9 +3065,8 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -3143,10 +3119,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3170,9 +3145,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB39_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -3190,9 +3164,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
@@ -3203,10 +3176,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10
@@ -3226,9 +3198,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB39_11
@@ -3239,9 +3210,8 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -3298,10 +3268,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a3
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3326,9 +3295,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB40_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -3347,9 +3315,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB40_6
@@ -3361,10 +3328,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a3
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10
@@ -3386,9 +3352,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
@@ -3400,9 +3365,8 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -8333,10 +8297,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8360,9 +8323,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB74_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -8380,9 +8342,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
@@ -8393,10 +8354,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
@@ -8416,9 +8376,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
@@ -8429,9 +8388,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -8483,10 +8441,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8510,9 +8467,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB75_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -8530,9 +8486,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB75_6
@@ -8543,10 +8498,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
@@ -8566,9 +8520,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB75_11
@@ -8579,9 +8532,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -8637,10 +8589,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8665,9 +8616,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB76_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -8686,9 +8636,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB76_6
@@ -8700,9 +8649,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB76_7
@@ -8725,9 +8673,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB76_11
@@ -8739,9 +8686,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -8795,10 +8741,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8822,9 +8767,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB77_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -8842,9 +8786,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB77_6
@@ -8855,10 +8798,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
@@ -8878,9 +8820,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB77_11
@@ -8891,9 +8832,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -8946,10 +8886,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8973,9 +8912,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB78_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -8993,9 +8931,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
@@ -9006,10 +8943,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
@@ -9029,9 +8965,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
@@ -9042,9 +8977,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -9101,10 +9035,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -9129,9 +9062,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB79_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -9150,9 +9082,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB79_6
@@ -9164,10 +9095,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
@@ -9189,9 +9119,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB79_11
@@ -9203,9 +9132,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
@@ -13481,13 +13409,13 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB107_2
; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1
-; RV64ZVE32F-NEXT: lbu a2, 5(a0)
-; RV64ZVE32F-NEXT: lbu a3, 4(a0)
-; RV64ZVE32F-NEXT: slli a2, a2, 8
-; RV64ZVE32F-NEXT: or a2, a2, a3
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 4
+; RV64ZVE32F-NEXT: lbu a3, 1(a2)
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: slli a3, a3, 8
+; RV64ZVE32F-NEXT: or a2, a3, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB107_3
@@ -13608,10 +13536,10 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB108_2
; RV64ZVE32F-NEXT: .LBB108_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB108_3
@@ -13717,10 +13645,10 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB109_2
; RV64ZVE32F-NEXT: .LBB109_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 6(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 6
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB109_3
@@ -13826,10 +13754,10 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB110_2
; RV64ZVE32F-NEXT: .LBB110_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 30(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 30
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB110_3
@@ -13935,10 +13863,10 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB111_2
; RV64ZVE32F-NEXT: .LBB111_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 30(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 30
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB111_3
@@ -14042,10 +13970,10 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB112_2
; RV64ZVE32F-NEXT: .LBB112_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB112_3
@@ -14152,10 +14080,10 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB113_2
; RV64ZVE32F-NEXT: .LBB113_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB113_3
@@ -14264,10 +14192,9 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB114_2
; RV64ZVE32F-NEXT: .LBB114_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 4(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: lh a3, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a3
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB114_3
@@ -14378,10 +14305,10 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB115_2
; RV64ZVE32F-NEXT: .LBB115_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB115_3
@@ -14489,10 +14416,10 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB116_2
; RV64ZVE32F-NEXT: .LBB116_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB116_3
@@ -14612,10 +14539,10 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB118_2
; RV64ZVE32F-NEXT: .LBB118_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 10(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 10
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB118_3
@@ -14723,10 +14650,10 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB119_2
; RV64ZVE32F-NEXT: .LBB119_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 4(a0)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: addi a2, a0, 4
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB119_3
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 44a396ee29a8ab3..10aa00ce8bbe9a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -834,8 +834,7 @@ define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y
; CHECK-LABEL: pre_lmul:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 4d0f640408dd2a9..1e2049e3e8a4d52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -329,9 +329,9 @@ entry:
define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.f.s fa5, v8
+; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa4, v8
; CHECK-NEXT: fadd.d fa0, fa5, fa4
@@ -468,7 +468,6 @@ entry:
define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
@@ -498,7 +497,6 @@ entry:
define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
@@ -519,7 +517,6 @@ entry:
define <vscale x 1 x i64> @vleNff2(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a2
@@ -545,7 +542,6 @@ define <vscale x 2 x i32> @avl_forward5(<vscale x 2 x i32>* %addr) {
; CHECK-LABEL: avl_forward5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll
index 11b164fbf51e830..00917d05b42e8cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-intrinsics.ll
@@ -118,12 +118,9 @@ define <vscale x 4 x i32> @redundant_vsetvli(iXLen %avl, <vscale x 4 x i32>* %pt
; Check that we remove the repeated/redundant vsetvli when followed by another
; operation
-; FIXME: We don't catch the second vsetvli because it has a use of its output.
-; We could replace it with the output of the first vsetvli.
define <vscale x 4 x i32> @repeated_vsetvli(iXLen %avl, <vscale x 4 x i32>* %ptr) nounwind {
; CHECK-LABEL: repeated_vsetvli:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, a0, e32, m2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a1)
; CHECK-NEXT: ret
@@ -156,3 +153,18 @@ define iXLen @test_vsetvli_eqvlmax_e8m8(iXLen %avl) nounwind {
%vl = call iXLen @llvm.riscv.vsetvli.iXLen(iXLen 128, iXLen 0, iXLen 3)
ret iXLen %vl
}
+
+; Check that we remove the repeated/redundant vsetvli, even in the case when
+; their results may be used in subsequent vsetvli (but nowhere else)
+define <vscale x 4 x i32> @chained_vsetvli(iXLen %avl, <vscale x 4 x i32>* %ptr) nounwind {
+; CHECK-LABEL: chained_vsetvli:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: ret
+ %vl0 = call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %avl, iXLen 2, iXLen 1)
+ %vl1 = call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %vl0, iXLen 2, iXLen 1)
+ %vl2 = call iXLen @llvm.riscv.vsetvli.iXLen(iXLen %vl1, iXLen 2, iXLen 1)
+ %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.iXLen(<vscale x 4 x i32> undef, <vscale x 4 x i32>* %ptr, iXLen %vl1)
+ ret <vscale x 4 x i32> %x
+}
>From 24702ccbb8e31a2f2d371a94f33ed727f791db3f Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Mon, 25 Sep 2023 10:14:48 +0100
Subject: [PATCH 2/7] fixup! Improve cleanup phase of RISCV Insert VSETVLI pass
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 138c28e4effabf6..e364ba058c32666 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1438,11 +1438,19 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
if (!isVectorConfigInstr(MI)) {
doUnion(Used, getDemanded(MI, MRI));
- // We can't handle the case when the source AVL
- // register of *NextMI is defined after MI
- if (NextMI && NextMI->getOperand(1).isReg() && MI.getOperand(0).isReg() &&
- MI.getOperand(0).getReg() == NextMI->getOperand(1).getReg())
- Used.demandVL();
+
+ // We can't handle the case when the source AVL register of *NextMI is
+ // defined after MI
+ if (NextMI && NextMI->getOperand(1).isReg()) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() &&
+ MO.getReg() == NextMI->getOperand(1).getReg()) {
+ Used.demandVL();
+ break;
+ }
+ }
+ }
+
continue;
}
>From 6399776a5398d0742acb1ad664a72a46e64fc23d Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Fri, 29 Sep 2023 09:52:29 +0100
Subject: [PATCH 3/7] fixup! Improve cleanup phase of RISCV Insert VSETVLI pass
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 10 ++++++----
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 1 -
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index e364ba058c32666..5d52c4ffb333705 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1433,7 +1433,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
DemandedFields Used;
Used.demandVL();
Used.demandVTYPE();
- SmallVector<MachineInstr*> ToDelete;
+ SmallPtrSet<MachineInstr *, 8> ToDelete;
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (!isVectorConfigInstr(MI)) {
@@ -1458,8 +1458,10 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
if (VRegDef != RISCV::X0 &&
!(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) {
for (MachineInstr &UserMI : MRI->use_nodbg_instructions(VRegDef)) {
- // ignore uses by *NextMI since we will deal with them explicitly
- if (!NextMI || !UserMI.isIdenticalTo(*NextMI)) {
+ // do not track uses by *NextMI since we will deal with them explicitly,
+ // and also ignore uses by instructions that will be removed
+ if ((!NextMI || !UserMI.isIdenticalTo(*NextMI)) &&
+ ToDelete.find(&UserMI) == ToDelete.end()) {
Used.demandVL();
break;
}
@@ -1478,7 +1480,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
// set VLType of MI to that of *NextMI
VLTypeOp.setImm(NextVLTypeOp.getImm());
// remove *NextMI and potentially replace its uses with MI
- ToDelete.push_back(NextMI);
+ ToDelete.insert(NextMI);
if (NextVRegDef != RISCV::X0 && !MRI->use_nodbg_empty(NextVRegDef)) {
if (VRegDef != RISCV::X0)
MRI->replaceRegWith(NextVRegDef, VRegDef);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 1e2049e3e8a4d52..0a8a802a72c6d92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -329,7 +329,6 @@ entry:
define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfadd.vv v8, v8, v9
>From 566ede43b962a9ac89fa71dbf822857415e6655e Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Fri, 29 Sep 2023 14:24:10 +0100
Subject: [PATCH 4/7] fixup! Improve cleanup phase of RISCV Insert VSETVLI pass
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 5d52c4ffb333705..2d997df29795003 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1461,7 +1461,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
// do not track uses by *NextMI since we will deal with them explicitly,
// and also ignore uses by instructions that will be removed
if ((!NextMI || !UserMI.isIdenticalTo(*NextMI)) &&
- ToDelete.find(&UserMI) == ToDelete.end()) {
+ !ToDelete.contains(&UserMI)) {
Used.demandVL();
break;
}
>From 1c1b326a40c791be321c00ee72066ccb0b168907 Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Thu, 12 Oct 2023 10:09:14 +0100
Subject: [PATCH 5/7] fixup! Improve cleanup phase of RISCV Insert VSETVLI pass
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 38 ++++++++++----------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 2d997df29795003..ad87e760a7acbd1 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -28,6 +28,7 @@
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <algorithm>
#include <queue>
using namespace llvm;
@@ -1441,31 +1442,30 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
// We can't handle the case when the source AVL register of *NextMI is
// defined after MI
- if (NextMI && NextMI->getOperand(1).isReg()) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() &&
- MO.getReg() == NextMI->getOperand(1).getReg()) {
- Used.demandVL();
- break;
- }
- }
- }
+ if (NextMI && NextMI->getOperand(1).isReg() &&
+ std::any_of(MI.operands_begin(), MI.operands_end(),
+ [&](const MachineOperand &MO) {
+ return MO.isReg() && MO.isDef() &&
+ MO.getReg() == NextMI->getOperand(1).getReg();
+ }))
+ Used.demandVL();
continue;
}
Register VRegDef = MI.getOperand(0).getReg();
if (VRegDef != RISCV::X0 &&
- !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) {
- for (MachineInstr &UserMI : MRI->use_nodbg_instructions(VRegDef)) {
- // do not track uses by *NextMI since we will deal with them explicitly,
- // and also ignore uses by instructions that will be removed
- if ((!NextMI || !UserMI.isIdenticalTo(*NextMI)) &&
- !ToDelete.contains(&UserMI)) {
- Used.demandVL();
- break;
- }
- }
+ !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)) &&
+ std::any_of(MRI->use_instr_nodbg_begin(VRegDef),
+ MRI->use_instr_nodbg_end(),
+ [&](const MachineInstr &UserMI) {
+ // do not track uses by *NextMI since we will deal with
+ // them explicitly, and also ignore uses by instructions
+ // that will be removed
+ return (!NextMI || !UserMI.isIdenticalTo(*NextMI)) &&
+ !ToDelete.contains(&UserMI);
+ })) {
+ Used.demandVL();
}
if (NextMI) {
>From 067d87b5b67a8f478f06a38c8419527532ac6a1e Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Thu, 19 Oct 2023 16:39:36 +0100
Subject: [PATCH 6/7] fixup! Improve cleanup phase of RISCV Insert VSETVLI pass
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 9 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 203 +++++++++---------
.../RISCV/rvv/vsetvli-insert-crossbb.ll | 3 +-
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 5 +
4 files changed, 110 insertions(+), 110 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index ad87e760a7acbd1..72ce00a072ad48c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1462,8 +1462,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
// do not track uses by *NextMI since we will deal with
// them explicitly, and also ignore uses by instructions
// that will be removed
- return (!NextMI || !UserMI.isIdenticalTo(*NextMI)) &&
- !ToDelete.contains(&UserMI);
+ if (NextMI && UserMI.isIdenticalTo(*NextMI) &&
+ MI.getOperand(2).getImm() ==
+ UserMI.getOperand(2).getImm())
+ return false;
+ if (ToDelete.contains(&UserMI))
+ return false;
+ return true;
})) {
Used.demandVL();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 9f597d574221f01..c226ed2b8ca0003 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2530,9 +2530,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2575,7 +2575,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -2587,9 +2587,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10
@@ -2674,9 +2674,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2719,7 +2719,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -2731,9 +2731,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10
@@ -2822,9 +2822,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2869,7 +2869,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -2882,7 +2882,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -2974,9 +2974,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3019,7 +3019,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -3031,9 +3031,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10
@@ -3119,9 +3119,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3164,7 +3164,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -3176,9 +3176,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10
@@ -3268,9 +3268,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v9, a3
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3315,7 +3315,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
@@ -3328,9 +3328,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v12, a3
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v8, a3
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10
@@ -8297,9 +8297,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8342,7 +8342,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8354,9 +8354,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
@@ -8441,9 +8441,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8486,7 +8486,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8498,9 +8498,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
@@ -8589,9 +8589,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8636,7 +8636,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8649,7 +8649,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -8741,9 +8741,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8786,7 +8786,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8798,9 +8798,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
@@ -8886,9 +8886,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8931,7 +8931,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8943,9 +8943,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
@@ -9035,9 +9035,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -9082,7 +9082,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
@@ -9095,9 +9095,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
@@ -13409,11 +13409,10 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB107_2
; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 4
-; RV64ZVE32F-NEXT: lbu a3, 1(a2)
-; RV64ZVE32F-NEXT: lbu a2, 0(a2)
-; RV64ZVE32F-NEXT: slli a3, a3, 8
-; RV64ZVE32F-NEXT: or a2, a3, a2
+; RV64ZVE32F-NEXT: lbu a2, 5(a0)
+; RV64ZVE32F-NEXT: lbu a3, 4(a0)
+; RV64ZVE32F-NEXT: slli a2, a2, 8
+; RV64ZVE32F-NEXT: or a2, a2, a3
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13536,8 +13535,7 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB108_2
; RV64ZVE32F-NEXT: .LBB108_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 2(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13645,8 +13643,7 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB109_2
; RV64ZVE32F-NEXT: .LBB109_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 6
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13754,8 +13751,7 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB110_2
; RV64ZVE32F-NEXT: .LBB110_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 30
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 30(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13863,8 +13859,7 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB111_2
; RV64ZVE32F-NEXT: .LBB111_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 30
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 30(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13970,8 +13965,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB112_2
; RV64ZVE32F-NEXT: .LBB112_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 2(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14080,8 +14074,7 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB113_2
; RV64ZVE32F-NEXT: .LBB113_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 2(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14192,9 +14185,9 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB114_2
; RV64ZVE32F-NEXT: .LBB114_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a3, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 4(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a3
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB114_3
@@ -14305,8 +14298,7 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB115_2
; RV64ZVE32F-NEXT: .LBB115_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 2(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14416,8 +14408,7 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB116_2
; RV64ZVE32F-NEXT: .LBB116_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 2
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 2(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14539,8 +14530,7 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB118_2
; RV64ZVE32F-NEXT: .LBB118_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 10
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 10(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14650,8 +14640,7 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB119_2
; RV64ZVE32F-NEXT: .LBB119_10: # %cond.load1
-; RV64ZVE32F-NEXT: addi a2, a0, 4
-; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 4(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 10aa00ce8bbe9a0..44a396ee29a8ab3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -834,7 +834,8 @@ define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y
; CHECK-LABEL: pre_lmul:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 0a8a802a72c6d92..5ddb5469c079ec5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -329,6 +329,7 @@ entry:
define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test17:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfadd.vv v8, v8, v9
@@ -467,6 +468,7 @@ entry:
define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
@@ -496,6 +498,7 @@ entry:
define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
@@ -516,6 +519,7 @@ entry:
define <vscale x 1 x i64> @vleNff2(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff2:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a2
@@ -541,6 +545,7 @@ define <vscale x 2 x i32> @avl_forward5(<vscale x 2 x i32>* %addr) {
; CHECK-LABEL: avl_forward5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
>From 74ef2d6a198552454f0aee67525dc8811d1adba8 Mon Sep 17 00:00:00 2001
From: Simeon Krastnikov <simeon.krastnikov at imgtec.com>
Date: Thu, 19 Oct 2023 16:46:52 +0100
Subject: [PATCH 7/7] Revert "fixup! Improve cleanup phase of RISCV Insert
VSETVLI pass"
This reverts commit 067d87b5b67a8f478f06a38c8419527532ac6a1e.
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 9 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 203 +++++++++---------
.../RISCV/rvv/vsetvli-insert-crossbb.ll | 3 +-
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll | 5 -
4 files changed, 110 insertions(+), 110 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 72ce00a072ad48c..ad87e760a7acbd1 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1462,13 +1462,8 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
// do not track uses by *NextMI since we will deal with
// them explicitly, and also ignore uses by instructions
// that will be removed
- if (NextMI && UserMI.isIdenticalTo(*NextMI) &&
- MI.getOperand(2).getImm() ==
- UserMI.getOperand(2).getImm())
- return false;
- if (ToDelete.contains(&UserMI))
- return false;
- return true;
+ return (!NextMI || !UserMI.isIdenticalTo(*NextMI)) &&
+ !ToDelete.contains(&UserMI);
})) {
Used.demandVL();
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index c226ed2b8ca0003..9f597d574221f01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2530,9 +2530,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2575,7 +2575,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -2587,9 +2587,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10
@@ -2674,9 +2674,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2719,7 +2719,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -2731,9 +2731,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10
@@ -2822,9 +2822,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -2869,7 +2869,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -2882,7 +2882,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -2974,9 +2974,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3019,7 +3019,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -3031,9 +3031,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10
@@ -3119,9 +3119,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3164,7 +3164,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -3176,9 +3176,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a2
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a2
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10
@@ -3268,9 +3268,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a3
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -3315,7 +3315,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
@@ -3328,9 +3328,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v8, a3
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vmv.s.x v12, a3
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10
@@ -8297,9 +8297,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8342,7 +8342,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8354,9 +8354,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
@@ -8441,9 +8441,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8486,7 +8486,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8498,9 +8498,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
@@ -8589,9 +8589,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8636,7 +8636,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8649,7 +8649,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -8741,9 +8741,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8786,7 +8786,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8798,9 +8798,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
@@ -8886,9 +8886,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -8931,7 +8931,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -8943,9 +8943,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
@@ -9035,9 +9035,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
@@ -9082,7 +9082,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
@@ -9095,9 +9095,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
-; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
+; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
@@ -13409,10 +13409,11 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB107_2
; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1
-; RV64ZVE32F-NEXT: lbu a2, 5(a0)
-; RV64ZVE32F-NEXT: lbu a3, 4(a0)
-; RV64ZVE32F-NEXT: slli a2, a2, 8
-; RV64ZVE32F-NEXT: or a2, a2, a3
+; RV64ZVE32F-NEXT: addi a2, a0, 4
+; RV64ZVE32F-NEXT: lbu a3, 1(a2)
+; RV64ZVE32F-NEXT: lbu a2, 0(a2)
+; RV64ZVE32F-NEXT: slli a3, a3, 8
+; RV64ZVE32F-NEXT: or a2, a3, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13535,7 +13536,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB108_2
; RV64ZVE32F-NEXT: .LBB108_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13643,7 +13645,8 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB109_2
; RV64ZVE32F-NEXT: .LBB109_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 6(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 6
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13751,7 +13754,8 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB110_2
; RV64ZVE32F-NEXT: .LBB110_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 30(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 30
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13859,7 +13863,8 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB111_2
; RV64ZVE32F-NEXT: .LBB111_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 30(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 30
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -13965,7 +13970,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB112_2
; RV64ZVE32F-NEXT: .LBB112_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14074,7 +14080,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB113_2
; RV64ZVE32F-NEXT: .LBB113_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14185,9 +14192,9 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB114_2
; RV64ZVE32F-NEXT: .LBB114_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 4(a0)
+; RV64ZVE32F-NEXT: lh a3, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-NEXT: vmv.s.x v9, a3
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB114_3
@@ -14298,7 +14305,8 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB115_2
; RV64ZVE32F-NEXT: .LBB115_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14408,7 +14416,8 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB116_2
; RV64ZVE32F-NEXT: .LBB116_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 2
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14530,7 +14539,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB118_2
; RV64ZVE32F-NEXT: .LBB118_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 10(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 10
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
@@ -14640,7 +14650,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB119_2
; RV64ZVE32F-NEXT: .LBB119_10: # %cond.load1
-; RV64ZVE32F-NEXT: lh a2, 4(a0)
+; RV64ZVE32F-NEXT: addi a2, a0, 4
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 44a396ee29a8ab3..10aa00ce8bbe9a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -834,8 +834,7 @@ define <vscale x 2 x i32> @pre_lmul(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y
; CHECK-LABEL: pre_lmul:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 5ddb5469c079ec5..0a8a802a72c6d92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -329,7 +329,6 @@ entry:
define double @test17(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
; CHECK-LABEL: test17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, a0, e32, mf2, ta, ma
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa5, v8
; CHECK-NEXT: vfadd.vv v8, v8, v9
@@ -468,7 +467,6 @@ entry:
define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
; CHECK-LABEL: avl_forward4:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a1, a1, e16, m1, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: ret
@@ -498,7 +496,6 @@ entry:
define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vsetvli zero, zero, e64, m1, tu, ma
@@ -519,7 +516,6 @@ entry:
define <vscale x 1 x i64> @vleNff2(i64* %str, i64 %n, i64 %x) {
; CHECK-LABEL: vleNff2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: vadd.vx v8, v8, a2
@@ -545,7 +541,6 @@ define <vscale x 2 x i32> @avl_forward5(<vscale x 2 x i32>* %addr) {
; CHECK-LABEL: avl_forward5:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli a1, a1, e8, m4, ta, ma
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list