[llvm] 46dee4a - [RISCV][InsertVSETVLI] Split out demanded property for zero/non-zero of VL
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 3 14:47:32 PST 2023
Author: Philip Reames
Date: 2023-01-03T14:47:13-08:00
New Revision: 46dee4a3a3dfb372a0eaa0b4490c80be2f421f29
URL: https://github.com/llvm/llvm-project/commit/46dee4a3a3dfb372a0eaa0b4490c80be2f421f29
DIFF: https://github.com/llvm/llvm-project/commit/46dee4a3a3dfb372a0eaa0b4490c80be2f421f29.diff
LOG: [RISCV][InsertVSETVLI] Split out demanded property for zero/non-zero of VL
The scalar move instructions (vmv.s.x, and fvmv.s.f) depend solely on whether the VL is 0 or non-zero. By tracking the fact we only demand the zeroness and not the whole VL value, we can allow changing VL over a scalar move. This helps to eliminate vsetvli toggles.
Differential Revision: https://reviews.llvm.org/D140157
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index bc36d062d27a..361bcc41e48c 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -126,7 +126,11 @@ static bool isMaskRegOp(const MachineInstr &MI) {
/// Which subfields of VL or VTYPE have values we need to preserve?
struct DemandedFields {
- bool VL = false;
+ // Some unknown property of VL is used. If demanded, must preserve entire
+ // value.
+ bool VLAny = false;
+ // Only zero vs non-zero is used. If demanded, can change non-zero values.
+ bool VLZeroness = false;
bool SEW = false;
bool LMUL = false;
bool SEWLMULRatio = false;
@@ -138,6 +142,11 @@ struct DemandedFields {
return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
}
+ // Return true if any property of VL was used
+ bool usedVL() {
+ return VLAny || VLZeroness;
+ }
+
// Mark all VTYPE subfields and properties as demanded
void demandVTYPE() {
SEW = true;
@@ -146,6 +155,13 @@ struct DemandedFields {
TailPolicy = true;
MaskPolicy = true;
}
+
+ // Mark all VL properties as demanded
+ void demandVL() {
+ VLAny = true;
+ VLZeroness = true;
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Support for debugging, callable in GDB: V->dump()
LLVM_DUMP_METHOD void dump() const {
@@ -156,7 +172,8 @@ struct DemandedFields {
/// Implement operator<<.
void print(raw_ostream &OS) const {
OS << "{";
- OS << "VL=" << VL << ", ";
+ OS << "VLAny=" << VLAny << ", ";
+ OS << "VLZeroness=" << VLZeroness << ", ";
OS << "SEW=" << SEW << ", ";
OS << "LMUL=" << LMUL << ", ";
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
@@ -219,7 +236,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
DemandedFields Res;
// Start conservative if registers are used
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
- Res.VL = true;
+ Res.demandVL();;
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
Res.demandVTYPE();
// Start conservative on the unlowered form too
@@ -227,7 +244,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
if (RISCVII::hasSEWOp(TSFlags)) {
Res.demandVTYPE();
if (RISCVII::hasVLOp(TSFlags))
- Res.VL = true;
+ Res.demandVL();
// Behavior is independent of mask policy.
if (!RISCVII::usesMaskPolicy(TSFlags))
@@ -264,6 +281,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
if (isScalarMoveInstr(MI)) {
Res.LMUL = false;
Res.SEWLMULRatio = false;
+ Res.VLAny = false;
}
return Res;
@@ -436,8 +454,12 @@ class VSETVLIInfo {
if (SEW == Require.SEW)
return true;
- if (Used.VL && !hasSameAVL(Require))
+ if (Used.VLAny && !hasSameAVL(Require))
+ return false;
+
+ if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
return false;
+
return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
}
@@ -787,12 +809,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
DemandedFields Used = getDemanded(MI);
- // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
- if (isScalarMoveInstr(MI) && CurInfo.hasEquallyZeroAVL(Require)) {
- Used.VL = false;
- // Additionally, if writing to an implicit_def operand, we don't need to
- // preserve any other bits and are thus compatible with any larger etype,
- // and can disregard policy bits. Warning: It's tempting to try doing
+ if (isScalarMoveInstr(MI)) {
+ // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't
+ // need to preserve any other bits and are thus compatible with any larger,
+ // etype and can disregard policy bits. Warning: It's tempting to try doing
// this for any tail agnostic operation, but we can't as TA requires
// tail lanes to either be the original value or -1. We are writing
// unknown bits to the lanes here.
@@ -1204,7 +1224,8 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
}
static void doUnion(DemandedFields &A, DemandedFields B) {
- A.VL |= B.VL;
+ A.VLAny |= B.VLAny;
+ A.VLZeroness |= B.VLZeroness;
A.SEW |= B.SEW;
A.LMUL |= B.LMUL;
A.SEWLMULRatio |= B.SEWLMULRatio;
@@ -1212,6 +1233,13 @@ static void doUnion(DemandedFields &A, DemandedFields B) {
A.MaskPolicy |= B.MaskPolicy;
}
+static bool isNonZeroAVL(const MachineOperand &MO) {
+ if (MO.isReg())
+ return RISCV::X0 == MO.getReg();
+ assert(MO.isImm());
+ return 0 != MO.getImm();
+}
+
// Return true if we can mutate PrevMI to match MI without changing any the
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
@@ -1221,13 +1249,20 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
// demanded, or b) we can't rewrite the former to be the later for
// implementation reasons.
if (!isVLPreservingConfig(MI)) {
- if (Used.VL)
+ if (Used.VLAny)
return false;
// TODO: Requires more care in the mutation...
if (isVLPreservingConfig(PrevMI))
return false;
+ // We don't bother to handle the equally zero case here as it's largely
+ // uninteresting.
+ if (Used.VLZeroness &&
+ (!isNonZeroAVL(MI.getOperand(1)) ||
+ !isNonZeroAVL(PrevMI.getOperand(1))))
+ return false;
+
// TODO: Track whether the register is defined between
// PrevMI and MI.
if (MI.getOperand(1).isReg() &&
@@ -1255,7 +1290,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
DemandedFields Used;
- Used.VL = true;
+ Used.demandVL();
Used.demandVTYPE();
SmallVector<MachineInstr*> ToDelete;
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
@@ -1268,10 +1303,10 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
Register VRegDef = MI.getOperand(0).getReg();
if (VRegDef != RISCV::X0 &&
!(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
- Used.VL = true;
+ Used.demandVL();
if (NextMI) {
- if (!Used.VL && !Used.usedVTYPE()) {
+ if (!Used.usedVL() && !Used.usedVTYPE()) {
ToDelete.push_back(&MI);
// Leave NextMI unchanged
continue;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 767dbc0efb1a..fb753f5a1a54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -467,9 +467,8 @@ define void @buildvec_seq_v9i8(ptr %x) {
; RV32-NEXT: li a1, 3
; RV32-NEXT: sb a1, 8(a0)
; RV32-NEXT: li a1, 73
-; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vmv.v.i v9, 2
; RV32-NEXT: li a1, 36
; RV32-NEXT: vmv.s.x v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 8e0167c8d4a8..361612892160 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -201,9 +201,8 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV64-LABEL: vrgather_shuffle_vv_v8i64:
; RV64: # %bb.0:
; RV64-NEXT: li a0, 5
-; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v16, a0
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
+; RV64-NEXT: vmv.s.x v16, a0
; RV64-NEXT: vmv.v.i v20, 2
; RV64-NEXT: lui a0, %hi(.LCPI11_0)
; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0)
@@ -386,9 +385,8 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 66
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
@@ -420,9 +418,8 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0_ins_i0we4:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 67
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index a812c0a3af5f..6b7f581f3416 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -166,9 +166,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) {
; CHECK-LABEL: buildvec_mask_nonconst_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 3
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vand.vi v8, v8, 1
@@ -178,9 +177,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) {
; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: li a2, 3
-; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; ZVE32F-NEXT: vmv.s.x v0, a2
; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVE32F-NEXT: vmv.s.x v0, a2
; ZVE32F-NEXT: vmv.v.x v8, a1
; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0
; ZVE32F-NEXT: vand.vi v8, v8, 1
@@ -295,9 +293,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) {
; CHECK-LABEL: buildvec_mask_nonconst_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 19
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vand.vi v8, v8, 1
@@ -307,9 +304,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) {
; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1:
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: li a2, 19
-; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; ZVE32F-NEXT: vmv.s.x v0, a2
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZVE32F-NEXT: vmv.s.x v0, a2
; ZVE32F-NEXT: vmv.v.x v8, a1
; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0
; ZVE32F-NEXT: vand.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 11b059a052b1..be781a983460 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2530,9 +2530,8 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.s.x v10, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.s.x v10, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
@@ -8445,9 +8444,8 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flw ft0, 0(a0)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 0515d611f36d..d6e4d879def1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -1110,9 +1110,8 @@ define double @vreduce_fwadd_v32f64(ptr %x, double %s) {
; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v24, v8, v16
-; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vfredusum.vs v8, v24, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 4a0b023104ed..45e91c50447b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -1435,9 +1435,8 @@ define i64 @vwreduce_add_v32i64(ptr %x) {
; RV32-NEXT: vslidedown.vi v16, v8, 16
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vwadd.vv v24, v8, v16
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vredsum.vs v8, v24, v8
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -1454,9 +1453,8 @@ define i64 @vwreduce_add_v32i64(ptr %x) {
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwadd.vv v24, v8, v16
-; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vredsum.vs v8, v24, v8
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1476,9 +1474,8 @@ define i64 @vwreduce_uadd_v32i64(ptr %x) {
; RV32-NEXT: vslidedown.vi v16, v8, 16
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vwaddu.vv v24, v8, v16
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT: vmv.s.x v8, zero
; RV32-NEXT: vredsum.vs v8, v24, v8
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
@@ -1495,9 +1492,8 @@ define i64 @vwreduce_uadd_v32i64(ptr %x) {
; RV64-NEXT: vslidedown.vi v16, v8, 16
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vwaddu.vv v24, v8, v16
-; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT: vmv.s.x v8, zero
; RV64-NEXT: vredsum.vs v8, v24, v8
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
index 0141d002194a..2326db8a3aa1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
@@ -4,9 +4,8 @@
define i64 @reduce_add(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_add:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -34,9 +33,8 @@ entry:
define i64 @reduce_and(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_and:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredand.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -64,9 +62,8 @@ entry:
define i64 @reduce_or(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_or:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -94,9 +91,8 @@ entry:
define i64 @reduce_xor(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_xor:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -109,9 +105,8 @@ entry:
define i64 @reduce_xor2(<4 x i64> %v) {
; CHECK-LABEL: reduce_xor2:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: andi a0, a0, 8
@@ -125,9 +120,8 @@ entry:
define i64 @reduce_umax(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_umax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -155,9 +149,8 @@ entry:
define i64 @reduce_umin(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_umin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredminu.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -185,9 +178,8 @@ entry:
define i64 @reduce_smax(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_smax:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredmax.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -215,9 +207,8 @@ entry:
define i64 @reduce_smin(i64 %x, <4 x i64> %v) {
; CHECK-LABEL: reduce_smin:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vredmin.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index 972298031e2f..aafa5de69d50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -199,9 +199,8 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -212,9 +211,8 @@ define float @vreduce_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
define float @vreduce_ord_fadd_nxv4f32(<vscale x 4 x float> %v, float %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -313,9 +311,8 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -326,9 +323,8 @@ define double @vreduce_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
define double @vreduce_ord_fadd_nxv2f64(<vscale x 2 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -371,9 +367,8 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, <vscale x 4 x double>)
define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_fadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -384,9 +379,8 @@ define double @vreduce_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
define double @vreduce_ord_fadd_nxv4f64(<vscale x 4 x double> %v, double %s) {
; CHECK-LABEL: vreduce_ord_fadd_nxv4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -584,9 +578,8 @@ define float @vreduce_fmin_nxv4f32(<vscale x 4 x float> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -662,9 +655,8 @@ define double @vreduce_fmin_nxv2f64(<vscale x 2 x double> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI45_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI45_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -679,9 +671,8 @@ define double @vreduce_fmin_nxv4f64(<vscale x 4 x double> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI46_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -866,9 +857,8 @@ define float @vreduce_fmax_nxv4f32(<vscale x 4 x float> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI58_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI58_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -944,9 +934,8 @@ define double @vreduce_fmax_nxv2f64(<vscale x 2 x double> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI63_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI63_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -961,9 +950,8 @@ define double @vreduce_fmax_nxv4f64(<vscale x 4 x double> %v) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI64_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI64_0)(a0)
-; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vfredmax.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1040,9 +1028,8 @@ define half @vreduce_ord_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: vfmv.v.f v10, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1068,9 +1055,8 @@ define half @vreduce_ord_fadd_nxv10f16(<vscale x 10 x half> %v, half %s) {
; CHECK-NEXT: vslideup.vi v11, v12, 0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, fa0
; CHECK-NEXT: vfredosum.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1132,9 +1118,8 @@ define half @vreduce_fadd_nxv6f16(<vscale x 6 x half> %v, half %s) {
; CHECK-NEXT: vfmv.v.f v10, ft0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v9, v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
@@ -1160,9 +1145,8 @@ define half @vreduce_fmin_nxv10f16(<vscale x 10 x half> %v) {
; CHECK-NEXT: vslideup.vi v11, v12, 0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vx v11, v12, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfmv.s.f v12, ft0
; CHECK-NEXT: vfredmin.vs v8, v8, v12
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
index 7941ab512192..ac7b927ff723 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll
@@ -1126,9 +1126,8 @@ declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_add_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_add_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -1171,9 +1170,8 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_umax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_umax_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vredmaxu.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -1187,9 +1185,8 @@ define signext i32 @vreduce_smax_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_smax_nxv4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vredmax.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -1219,9 +1216,8 @@ define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
; RV32: # %bb.0:
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; RV32-NEXT: vmv.s.x v10, a0
-; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV32-NEXT: vredmin.vs v8, v8, v10
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: ret
@@ -1230,9 +1226,8 @@ define signext i32 @vreduce_smin_nxv4i32(<vscale x 4 x i32> %v) {
; RV64: # %bb.0:
; RV64-NEXT: lui a0, 524288
; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1260,9 +1255,8 @@ declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_or_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_or_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vredor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -1275,9 +1269,8 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
define signext i32 @vreduce_xor_nxv4i32(<vscale x 4 x i32> %v) {
; CHECK-LABEL: vreduce_xor_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmv.s.x v10, zero
; CHECK-NEXT: vredxor.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
@@ -1597,9 +1590,8 @@ define i64 @vreduce_add_nxv2i64(<vscale x 2 x i64> %v) {
;
; RV64-LABEL: vreduce_add_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vredsum.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1681,9 +1673,8 @@ define i64 @vreduce_umax_nxv2i64(<vscale x 2 x i64> %v) {
;
; RV64-LABEL: vreduce_umax_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vredmaxu.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1718,9 +1709,8 @@ define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1785,9 +1775,8 @@ define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1841,9 +1830,8 @@ define i64 @vreduce_or_nxv2i64(<vscale x 2 x i64> %v) {
;
; RV64-LABEL: vreduce_or_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vredor.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1869,9 +1857,8 @@ define i64 @vreduce_xor_nxv2i64(<vscale x 2 x i64> %v) {
;
; RV64-LABEL: vreduce_xor_nxv2i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.s.x v10, zero
; RV64-NEXT: vredxor.vs v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1897,9 +1884,8 @@ define i64 @vreduce_add_nxv4i64(<vscale x 4 x i64> %v) {
;
; RV64-LABEL: vreduce_add_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vredsum.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -1981,9 +1967,8 @@ define i64 @vreduce_umax_nxv4i64(<vscale x 4 x i64> %v) {
;
; RV64-LABEL: vreduce_umax_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vredmaxu.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -2018,9 +2003,8 @@ define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: slli a0, a0, 63
-; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmax.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -2085,9 +2069,8 @@ define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; RV64: # %bb.0:
; RV64-NEXT: li a0, -1
; RV64-NEXT: srli a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV64-NEXT: vmv.s.x v12, a0
-; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
; RV64-NEXT: vredmin.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -2141,9 +2124,8 @@ define i64 @vreduce_or_nxv4i64(<vscale x 4 x i64> %v) {
;
; RV64-LABEL: vreduce_or_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vredor.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
@@ -2169,9 +2151,8 @@ define i64 @vreduce_xor_nxv4i64(<vscale x 4 x i64> %v) {
;
; RV64-LABEL: vreduce_xor_nxv4i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma
+; RV64-NEXT: vmv.s.x v12, zero
; RV64-NEXT: vredxor.vs v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
More information about the llvm-commits
mailing list