[llvm] [RISCV] Adjust LMUL if not used to avoid VL toggle (PR #69259)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 17 14:32:26 PDT 2023
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/69259
>From 31f369229bf6f2f3051530ec09cd5e50e9462442 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 16 Oct 2023 18:21:57 -0400
Subject: [PATCH 1/3] [RISCV] Adjust LMUL if not used to avoid VL toggle
A common pattern with vmv.s.x is that we need to set VL afterwards because the
SEW/LMUL ratio has changed, and thus VLMAX has changed:
vsetvli zero, a1, e64, m1, ta, ma
vmv.s.x v16, a0
vsetvli zero, a1, e32, m2, ta, ma
However since LMUL and the SEW/LMUL ratio are ignored by vmv.s.x, we can avoid
a VL toggle in the second vsetvli instruction by adjusting LMUL so that the
SEW/LMUL ratio remains the same between the two instructions:
vsetvli zero, a1, e64, m4, ta, ma
vmv.s.x v16, a0
vsetvli zero, zero, e32, m2, ta, ma
Avoiding a VL toggle may be more performant on some architectures, and in some
cases allows a vsetvli to be deleted.
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 58 +++++++++++++
.../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 10 +--
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 72 ++++++++--------
.../RISCV/rvv/fixed-vectors-reduction-fp.ll | 59 +++++++------
.../RISCV/rvv/fixed-vectors-reduction-int.ll | 84 +++++++++----------
.../RISCV/rvv/fixed-vectors-unaligned.ll | 4 +-
.../RISCV/rvv/vreductions-fp-sdnode.ll | 24 +++---
.../test/CodeGen/RISCV/rvv/vreductions-int.ll | 24 +++---
8 files changed, 193 insertions(+), 142 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 5584fa8d503dbe4..6724acab277b5a5 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1463,6 +1463,55 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
+// If LMUL or the SEW/LMUL ratio aren't demanded and MI and NextMI have the same
+// AVL, then we can try and change MI's LMUL so that we can avoid setting VL in
+// NextMI, e.g:
+//
+// vsetivli zero, 4, e32, m1, ta, ma
+// vsetivli zero, 4, e16, mf4, ta, ma
+//
+// vsetivli zero, 4, e32, mf2, ta, ma
+// vsetvli zero, zero, e16, mf4, ta, ma
+//
+// If possible, returns the new VTYPE that should be used for MI.
+static std::optional<unsigned>
+canAdjustSEWLMULRatio(const MachineInstr &MI, const MachineInstr &NextMI,
+ const DemandedFields &Used) {
+ if (Used.LMUL || Used.SEWLMULRatio)
+ return std::nullopt;
+ if (!NextMI.getOperand(0).isDead())
+ return std::nullopt;
+ // If we end up increasing the SEW/LMUL ratio, then we will decrease VLMAX,
+ // which means we might end up changing VL in the case that AVL > VLMAX. So
+ // bail if the exact VL value is needed.
+ //
+ // TODO: We could potentially relax this when we know we're increasing VLMAX.
+ if (Used.VLAny)
+ return std::nullopt;
+
+ // If NextMI is already zero, zero then bail. If MI is zero, zero then we
+ // won't be able to tell if it has the same AVL as NextMI, so also bail.
+ if (isVLPreservingConfig(MI) || isVLPreservingConfig(NextMI))
+ return std::nullopt;
+
+ VSETVLIInfo NextMIInfo = getInfoForVSETVLI(NextMI);
+ VSETVLIInfo MIInfo = getInfoForVSETVLI(MI);
+ if (!MIInfo.hasSameAVL(NextMIInfo))
+ return std::nullopt;
+
+ unsigned SEW = MIInfo.getSEW() * 8;
+ // Fixed point value with 3 fractional bits.
+ unsigned NewRatio = SEW / NextMIInfo.getSEWLMULRatio();
+ bool Fractional = NewRatio < 8;
+ RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL(
+ Fractional ? 8 / NewRatio : NewRatio / 8, Fractional);
+
+ unsigned VType = MIInfo.encodeVTYPE();
+ return RISCVVType::encodeVTYPE(NewVLMul, SEW / 8,
+ RISCVVType::isTailAgnostic(VType),
+ RISCVVType::isMaskAgnostic(VType));
+}
+
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
@@ -1484,6 +1533,15 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
Used.demandVL();
if (NextMI) {
+ if (auto NewVType = canAdjustSEWLMULRatio(MI, *NextMI, Used)) {
+ MI.getOperand(2).setImm(*NewVType);
+ // Convert NextMI to vsetvli zero, zero
+ NextMI->setDesc(TII->get(RISCV::PseudoVSETVLIX0));
+ NextMI->getOperand(0).setReg(RISCV::X0);
+ NextMI->getOperand(0).setIsDead(true);
+ NextMI->getOperand(1).ChangeToRegister(RISCV::X0, false, false, true);
+ }
+
if (!Used.usedVL() && !Used.usedVTYPE()) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index cbcca9d2696f4ba..4a39bbb519cc017 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -65,9 +65,8 @@ define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v32i32_31:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.s.x v16, a0
; CHECK-NEXT: vslideup.vi v8, v16, 31
; CHECK-NEXT: ret
%b = insertelement <32 x i32> %a, i32 %y, i32 31
@@ -103,9 +102,8 @@ define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v64i32_63:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v24, a0
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.s.x v24, a0
; CHECK-NEXT: vslideup.vi v16, v24, 31
; CHECK-NEXT: ret
%b = insertelement <64 x i32> %a, i32 %y, i32 63
@@ -550,9 +548,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a2, 6
-; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: vse64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 60b61e889315cfe..06fb0a8eba43367 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -1138,11 +1138,11 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -1271,11 +1271,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -1408,12 +1408,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2043,11 +2043,11 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2175,11 +2175,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2314,12 +2314,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2451,11 +2451,11 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2584,11 +2584,11 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -2724,12 +2724,12 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
@@ -6393,11 +6393,11 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -6526,11 +6526,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -6663,12 +6663,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -7249,11 +7249,11 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB68_7
; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7385,11 +7385,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB69_7
; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7528,12 +7528,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB70_7
; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7669,11 +7669,11 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB71_7
; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7806,11 +7806,11 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB72_7
; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
@@ -7950,12 +7950,12 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB73_7
; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 4766b3727a46252..514470227af7835 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -478,9 +478,9 @@ define float @vreduce_fwadd_v8f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -496,9 +496,9 @@ define float @vreduce_ord_fwadd_v8f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -544,9 +544,9 @@ define float @vreduce_fwadd_v16f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -562,9 +562,9 @@ define float @vreduce_ord_fwadd_v16f32(ptr %x, float %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -613,9 +613,9 @@ define float @vreduce_fwadd_v32f32(ptr %x, float %s) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -632,9 +632,9 @@ define float @vreduce_ord_fwadd_v32f32(ptr %x, float %s) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -694,9 +694,8 @@ define float @vreduce_fwadd_v64f32(ptr %x, float %s) {
; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vfredusum.vs v8, v24, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -715,9 +714,9 @@ define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) {
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vslidedown.vx v16, v8, a0
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vfwredosum.vs v8, v16, v8
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -890,9 +889,9 @@ define double @vreduce_fwadd_v4f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -908,9 +907,9 @@ define double @vreduce_ord_fwadd_v4f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -956,9 +955,9 @@ define double @vreduce_fwadd_v8f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -974,9 +973,9 @@ define double @vreduce_ord_fwadd_v8f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -1022,9 +1021,9 @@ define double @vreduce_fwadd_v16f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -1040,9 +1039,9 @@ define double @vreduce_ord_fwadd_v16f64(ptr %x, double %s) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -1118,9 +1117,9 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; CHECK-NEXT: vslidedown.vi v16, v8, 16
-; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vfmv.s.f v12, fa0
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v12
; CHECK-NEXT: vfwredosum.vs v8, v16, v8
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index f2a1f2752cda000..5942e9b0533e979 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -364,9 +364,9 @@ define i16 @vwreduce_add_v16i16(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -382,9 +382,9 @@ define i16 @vwreduce_uadd_v16i16(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -418,9 +418,9 @@ define i16 @vwreduce_add_v32i16(ptr %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -437,9 +437,9 @@ define i16 @vwreduce_uadd_v32i16(ptr %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -473,9 +473,9 @@ define i16 @vwreduce_add_v64i16(ptr %x) {
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -492,9 +492,9 @@ define i16 @vwreduce_uadd_v64i16(ptr %x) {
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -536,9 +536,8 @@ define i16 @vwreduce_add_v128i16(ptr %x) {
; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vredsum.vs v8, v24, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -559,9 +558,8 @@ define i16 @vwreduce_uadd_v128i16(ptr %x) {
; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vwaddu.vv v24, v8, v16
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vredsum.vs v8, v24, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -734,9 +732,9 @@ define i32 @vwreduce_add_v8i32(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -752,9 +750,9 @@ define i32 @vwreduce_uadd_v8i32(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -786,9 +784,9 @@ define i32 @vwreduce_add_v16i32(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -804,9 +802,9 @@ define i32 @vwreduce_uadd_v16i32(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -840,9 +838,9 @@ define i32 @vwreduce_add_v32i32(ptr %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -859,9 +857,9 @@ define i32 @vwreduce_uadd_v32i32(ptr %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vmv.s.x v12, zero
-; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v12
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -903,9 +901,8 @@ define i32 @vwreduce_add_v64i32(ptr %x) {
; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vredsum.vs v8, v24, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -926,9 +923,8 @@ define i32 @vwreduce_uadd_v64i32(ptr %x) {
; CHECK-NEXT: vslidedown.vx v16, v8, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vwaddu.vv v24, v8, v16
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vredsum.vs v8, v24, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -1152,9 +1148,9 @@ define i64 @vwreduce_add_v4i64(ptr %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1185,9 +1181,9 @@ define i64 @vwreduce_uadd_v4i64(ptr %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1247,9 +1243,9 @@ define i64 @vwreduce_add_v8i64(ptr %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v10
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1280,9 +1276,9 @@ define i64 @vwreduce_uadd_v8i64(ptr %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v10
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1342,9 +1338,9 @@ define i64 @vwreduce_add_v16i64(ptr %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
-; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v12
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1375,9 +1371,9 @@ define i64 @vwreduce_uadd_v16i64(ptr %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vmv.s.x v12, zero
-; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v12
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 1cc09b7f5eeb5f4..a883ca5396ff31e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -341,9 +341,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: .LBB6_4: # %else6
; RV64-SLOW-NEXT: ret
; RV64-SLOW-NEXT: .LBB6_5: # %cond.store
-; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-SLOW-NEXT: vmv.x.s a1, v8
-; RV64-SLOW-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-SLOW-NEXT: vmv.x.s a2, v10
; RV64-SLOW-NEXT: srli a3, a1, 8
; RV64-SLOW-NEXT: sb a3, 1(a2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index c5245451dc44091..4319eb4e290b917 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -223,9 +223,9 @@ define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -238,9 +238,9 @@ define float @vreduce_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
define float @vreduce_ord_fwadd_nxv4f32(<vscale x 4 x half> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -335,9 +335,9 @@ define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -350,9 +350,9 @@ define double @vreduce_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
define double @vreduce_ord_fwadd_nxv2f64(<vscale x 2 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; CHECK-NEXT: vfmv.s.f v9, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -391,9 +391,9 @@ define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_fwadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredusum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -406,9 +406,9 @@ define double @vreduce_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
define double @vreduce_ord_fwadd_nxv4f64(<vscale x 4 x float> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; CHECK-NEXT: vfmv.s.f v10, fa0
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vfwredosum.vs v8, v8, v10
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vfmv.f.s fa0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
index 3f1892ede5678c0..8398fe4c32ffc3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
@@ -1019,9 +1019,9 @@ define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vwreduce_add_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsum.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -1034,9 +1034,9 @@ define signext i32 @vwreduce_add_nxv4i16(<vscale x 4 x i16> %v) {
define signext i32 @vwreduce_uadd_nxv4i16(<vscale x 4 x i16> %v) {
; CHECK-LABEL: vwreduce_uadd_nxv4i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v8, v8, v9
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8
@@ -1432,9 +1432,9 @@ define i64 @vwreduce_add_nxv2i32(<vscale x 2 x i32> %v) {
;
; RV64-LABEL: vwreduce_add_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1460,9 +1460,9 @@ define i64 @vwreduce_uadd_nxv2i32(<vscale x 2 x i32> %v) {
;
; RV64-LABEL: vwreduce_uadd_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vmv.s.x v9, zero
-; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1684,9 +1684,9 @@ define i64 @vwreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
;
; RV64-LABEL: vwreduce_add_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsum.vs v8, v8, v10
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
@@ -1712,9 +1712,9 @@ define i64 @vwreduce_uadd_nxv4i32(<vscale x 4 x i32> %v) {
;
; RV64-LABEL: vwreduce_uadd_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vmv.s.x v10, zero
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vwredsumu.vs v8, v8, v10
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vmv.x.s a0, v8
>From c131942c8a93cf66707bac76f8023b745ebb89e0 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 16 Oct 2023 23:31:46 -0400
Subject: [PATCH 2/3] Check that new ratio is valid
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 2 +
.../test/CodeGen/RISCV/rvv/vsetvli-insert.mir | 54 +++++++++++++++++++
2 files changed, 56 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6724acab277b5a5..e00c171d9b27335 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1502,6 +1502,8 @@ canAdjustSEWLMULRatio(const MachineInstr &MI, const MachineInstr &NextMI,
unsigned SEW = MIInfo.getSEW() * 8;
// Fixed point value with 3 fractional bits.
unsigned NewRatio = SEW / NextMIInfo.getSEWLMULRatio();
+ if (NewRatio < 1 || NewRatio > 64)
+ return std::nullopt;
bool Fractional = NewRatio < 8;
RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL(
Fractional ? 8 / NewRatio : NewRatio / 8, Fractional);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index 7bda7a387c68f96..9318e7e695fd5f1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -72,6 +72,14 @@
ret void
}
+ define void @vmv_s_x() {
+ ret void
+ }
+
+ define void @vmv_s_x_too_large_ratio_diff() {
+ ret void
+ }
+
declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>* nocapture, i64) #4
@@ -446,3 +454,49 @@ body: |
%4:vr = PseudoVMV_V_I_MF4 undef %4, 0, 4, 3, 0
PseudoRET
...
+
+...
+---
+name: vmv_s_x
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x1
+ ; CHECK-LABEL: name: vmv_s_x
+ ; CHECK: liveins: $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x1
+ ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 201 /* e16, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVMV_S_X_M1_:%[0-9]+]]:vr = PseudoVMV_S_X_M1 %pt, [[COPY]], 1, 4 /* e16 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VI_M1_:%[0-9]+]]:vr = PseudoVADD_VI_M1 %pt, [[PseudoVMV_S_X_M1_]], 1, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:gpr = COPY $x1
+ %pt:vr = IMPLICIT_DEF
+ %1:vr = PseudoVMV_S_X_M1 %pt, %0, 1, 4
+ %2:vr = PseudoVADD_VI_M1 %pt, %1, 1, 1, 3, 0
+ PseudoRET
+
+...
+---
+name: vmv_s_x_too_large_ratio_diff
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x1
+ ; CHECK-LABEL: name: vmv_s_x_too_large_ratio_diff
+ ; CHECK: liveins: $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x1
+ ; CHECK-NEXT: %pt:vrm8 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 219 /* e64, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVMV_S_X_M8_:%[0-9]+]]:vrm8 = PseudoVMV_S_X_M8 %pt, [[COPY]], 1, 6 /* e64 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VI_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VI_M8 %pt, [[PseudoVMV_S_X_M8_]], 1, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:gpr = COPY $x1
+ %pt:vrm8 = IMPLICIT_DEF
+ %1:vrm8 = PseudoVMV_S_X_M8 %pt, %0, 1, 6
+ %2:vrm8 = PseudoVADD_VI_M8 %pt, %1, 1, 1, 3, 0
+ PseudoRET
>From 0ddba020b1b2a9a9a8219eaefd7dda4352537c20 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 17 Oct 2023 17:10:48 -0400
Subject: [PATCH 3/3] Update test outside of rvv tree
---
llvm/test/CodeGen/RISCV/double_reduct.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll
index cecaa9d24f8bccf..aa8a60fab18415d 100644
--- a/llvm/test/CodeGen/RISCV/double_reduct.ll
+++ b/llvm/test/CodeGen/RISCV/double_reduct.ll
@@ -106,9 +106,9 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) {
define i16 @add_ext_v32i16(<32 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: add_ext_v32i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v11, zero
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vwredsumu.vs v10, v10, v11
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
More information about the llvm-commits
mailing list