[llvm] 603ba4c - [RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (#88295)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 01:31:44 PDT 2024
Author: Luke Lau
Date: 2024-04-24T16:31:40+08:00
New Revision: 603ba4c59635ff5f8a54d1106719124aee20271d
URL: https://github.com/llvm/llvm-project/commit/603ba4c59635ff5f8a54d1106719124aee20271d
DIFF: https://github.com/llvm/llvm-project/commit/603ba4c59635ff5f8a54d1106719124aee20271d.diff
LOG: [RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (#88295)
This patch splits off part of the work to move vsetvli insertion to post
regalloc in #70549.
The doLocalPostpass operates outside of RISCVInsertVSETVLI's dataflow,
so we can move it to its own pass. We can then move it to post vector
regalloc which should be a smaller change.
A couple of things that are different from #70549:
- This manually fixes up the LiveIntervals rather than recomputing it
via createAndComputeVirtRegInterval. I'm not sure if there's much of a
difference with either.
- For the postpass it's sufficient enough to just check isUndef() in
hasUndefinedMergeOp, i.e. we don't need to lookup the def in VNInfo.
Running on llvm-test-suite and SPEC CPU 2017 there aren't any changes in
the number of vsetvlis removed. There are some minor scheduling diffs as
well as extra spills and less spills in some cases (caused by transient
vsetvlis existing between RISCVInsertVSETVLI and RISCVCoalesceVSETVLI
when vec regalloc happens), but they are minor and should go away once
we finish moving the rest of RISCVInsertVSETVLI.
We could also potentially turn off this pass for unoptimised builds.
Added:
Modified:
llvm/lib/Target/RISCV/RISCV.h
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/O0-pipeline.ll
llvm/test/CodeGen/RISCV/O3-pipeline.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
llvm/test/CodeGen/RISCV/rvv/vmfge.ll
llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
llvm/test/CodeGen/RISCV/rvv/vmfle.ll
llvm/test/CodeGen/RISCV/rvv/vmflt.ll
llvm/test/CodeGen/RISCV/rvv/vmfne.ll
llvm/test/CodeGen/RISCV/rvv/vmseq.ll
llvm/test/CodeGen/RISCV/rvv/vmsge.ll
llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
llvm/test/CodeGen/RISCV/rvv/vmsle.ll
llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
llvm/test/CodeGen/RISCV/rvv/vmslt.ll
llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
llvm/test/CodeGen/RISCV/rvv/vmsne.ll
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index 7af543f018ccbd..d405395dcf9ec4 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
+FunctionPass *createRISCVCoalesceVSETVLIPass();
+void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);
+
FunctionPass *createRISCVPostRAExpandPseudoPass();
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 331253e39c0acb..635932edb5f8ac 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -27,16 +27,19 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
+#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
-STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
+STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -190,6 +193,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
if (UseMO.getReg() == RISCV::NoRegister)
return true;
+ if (UseMO.isUndef())
+ return true;
+ if (UseMO.getReg().isPhysical())
+ return false;
+
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
@@ -778,11 +786,40 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
- void doLocalPostpass(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
};
+class RISCVCoalesceVSETVLI : public MachineFunctionPass {
+public:
+ static char ID;
+ const RISCVSubtarget *ST;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+ RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addPreserved<LiveStacks>();
+
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
+
+private:
+ bool coalesceVSETVLIs(MachineBasicBlock &MBB);
+};
+
} // end anonymous namespace
char RISCVInsertVSETVLI::ID = 0;
@@ -790,6 +827,11 @@ char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
+char RISCVCoalesceVSETVLI::ID = 0;
+
+INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
+ RISCV_COALESCE_VSETVLI_NAME, false, false)
+
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1511,12 +1553,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
auto &AVL = MI.getOperand(1);
auto &PrevAVL = PrevMI.getOperand(1);
- assert(MRI.isSSA());
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
- (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
+ (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
+ PrevAVL.getReg() != AVL.getReg()))
return false;
}
@@ -1526,7 +1568,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
-void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
+bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
@@ -1558,8 +1600,28 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
- MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
+ Register DefReg = NextMI->getOperand(0).getReg();
+
+ MI.getOperand(0).setReg(DefReg);
MI.getOperand(0).setIsDead(false);
+
+ // The def of DefReg moved to MI, so extend the LiveInterval up to
+ // it.
+ if (DefReg.isVirtual()) {
+ LiveInterval &DefLI = LIS->getInterval(DefReg);
+ SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
+ VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
+ LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
+ DefLI.addSegment(S);
+ DefVNI->def = MISlot;
+ // Mark DefLI as spillable if it was previously unspillable
+ DefLI.setWeight(0);
+
+ // DefReg may have had no uses, in which case we need to shrink
+ // the LiveInterval up to MI.
+ LIS->shrinkToUses(&DefLI);
+ }
+
Register OldVLReg;
if (MI.getOperand(1).isReg())
OldVLReg = MI.getOperand(1).getReg();
@@ -1567,11 +1629,20 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
+
+ // Clear NextMI's AVL early so we're not counting it as a use.
+ if (NextMI->getOperand(1).isReg())
+ NextMI->getOperand(1).setReg(RISCV::NoRegister);
+
if (OldVLReg) {
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg))
VLOpDef->eraseFromParent();
+
+ // NextMI no longer uses OldVLReg so shrink its LiveInterval.
+ if (OldVLReg.isVirtual())
+ LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
}
MI.setDesc(NextMI->getDesc());
}
@@ -1584,9 +1655,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
Used = getDemanded(MI, MRI, ST);
}
- NumRemovedVSETVL += ToDelete.size();
- for (auto *MI : ToDelete)
+ NumCoalescedVSETVL += ToDelete.size();
+ for (auto *MI : ToDelete) {
+ LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
+ }
+
+ return !ToDelete.empty();
}
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
@@ -1661,15 +1736,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
- // Now that all vsetvlis are explicit, go through and do block local
- // DSE and peephole based demanded fields based transforms. Note that
- // this *must* be done outside the main dataflow so long as we allow
- // any cross block analysis within the dataflow. We can't have both
- // demanded fields based mutation and non-local analysis in the
- // dataflow at the same time without introducing inconsistencies.
- for (MachineBasicBlock &MBB : MF)
- doLocalPostpass(MBB);
-
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
@@ -1683,3 +1749,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}
+
+// Now that all vsetvlis are explicit, go through and do block local
+// DSE and peephole based demanded fields based transforms. Note that
+// this *must* be done outside the main dataflow so long as we allow
+// any cross block analysis within the dataflow. We can't have both
+// demanded fields based mutation and non-local analysis in the
+// dataflow at the same time without introducing inconsistencies.
+bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
+ // Skip if the vector extension is not enabled.
+ ST = &MF.getSubtarget<RISCVSubtarget>();
+ if (!ST->hasVInstructions())
+ return false;
+ TII = ST->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= coalesceVSETVLIs(MBB);
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
+ return new RISCVCoalesceVSETVLI();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 34ddd635231087..0876f46728a10c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -116,6 +116,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
+ initializeRISCVCoalesceVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
@@ -388,12 +389,14 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
+ addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteFast();
}
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
+ addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index faf37545e1a117..56bd4bd0c08f09 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -47,6 +47,10 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
+; CHECK-NEXT: MachineDominator Tree Construction
+; CHECK-NEXT: Slot index numbering
+; CHECK-NEXT: Live Interval Analysis
+; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
; CHECK-NEXT: Fixup Statepoint Caller Saved
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 90472f246918f3..4121d111091117 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -143,6 +143,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
+; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Virtual Register Map
; CHECK-NEXT: Live Register Matrix
; CHECK-NEXT: Greedy Register Allocator
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 8e214e40547832..9e83efd3519539 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
@@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index 6bfd0ac932672f..ed152e64a91ef4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
@@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
-; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
+; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index 85b849045e8cee..a8e4af2d7368e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
@@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
+; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
@@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v9, v9, a0
+; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
@@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslide1down.vx v9, v9, a0
+; RV64-NEXT: vmv.v.i v0, 15
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 6da83644413bc2..40ff8b50d99d8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT: vmv.v.i v0, 10
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
@@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
-; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
+; RV64-V512-NEXT: vmv.v.i v0, 10
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
@@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; V128-NEXT: vid.v v8
; V128-NEXT: vsrl.vi v8, v8, 1
-; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vadd.vi v8, v8, 1
+; V128-NEXT: vmv.v.i v0, 10
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
; V128-NEXT: vmv.v.v v8, v10
; V128-NEXT: ret
@@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
; V512-NEXT: vid.v v8
; V512-NEXT: vsrl.vi v8, v8, 1
-; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vadd.vi v8, v8, 1
+; V512-NEXT: vmv.v.i v0, 10
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 0e8d9cf0306690..58af6ac246d161 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v11, (a0)
-; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vrgather.vv v10, v8, v11
+; CHECK-NEXT: vmv.v.i v0, 8
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -162,16 +162,16 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v16, 2
-; RV32-NEXT: li a0, 5
-; RV32-NEXT: vslide1down.vx v20, v16, a0
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0)
-; RV32-NEXT: vle16.v v21, (a0)
+; RV32-NEXT: vle16.v v20, (a0)
+; RV32-NEXT: li a0, 5
+; RV32-NEXT: vslide1down.vx v21, v16, a0
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
+; RV32-NEXT: vrgatherei16.vv v16, v8, v20
; RV32-NEXT: li a0, 164
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgatherei16.vv v16, v8, v21
-; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t
+; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
@@ -210,13 +210,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: vmv.v.i v20, -1
+; RV32-NEXT: vrgatherei16.vv v12, v20, v16
; RV32-NEXT: lui a0, %hi(.LCPI12_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1)
-; RV32-NEXT: vle16.v v17, (a0)
+; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: li a0, 113
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgatherei16.vv v12, v20, v16
-; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
+; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -368,9 +368,9 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -387,9 +387,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
; CHECK-NEXT: vmv.s.x v11, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -422,9 +422,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v11, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -441,9 +441,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v11, v10, 2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: li a0, 70
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -464,9 +464,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: li a0, 98
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -681,9 +681,9 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w
; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
@@ -695,12 +695,12 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: unmergable:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index f98cb343a2ab42..99364264de8293 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -159,16 +159,17 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 56
+; RV32-NEXT: li a3, 54
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x36, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 54 * vlenb
; RV32-NEXT: addi a3, a1, 256
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v16, (a3)
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
+; RV32-NEXT: li a4, 21
+; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
@@ -176,31 +177,30 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vslideup.vi v8, v16, 4
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: slli a5, a4, 3
+; RV32-NEXT: add a4, a5, a4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 12
-; RV32-NEXT: vmv.s.x v3, a4
+; RV32-NEXT: vmv.s.x v0, a4
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v16, v16, 16
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 24
+; RV32-NEXT: li a5, 37
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: vmv1r.v v0, v3
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 2
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs1r.v v3, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 20
-; RV32-NEXT: mul a4, a4, a5
+; RV32-NEXT: slli a5, a4, 4
+; RV32-NEXT: add a4, a5, a4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
@@ -209,71 +209,82 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
; RV32-NEXT: vle16.v v8, (a4)
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a4, a4, 3
+; RV32-NEXT: li a5, 13
+; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: lui a4, %hi(.LCPI6_1)
-; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1)
-; RV32-NEXT: lui a5, 1
-; RV32-NEXT: vle16.v v8, (a4)
-; RV32-NEXT: addi a4, sp, 16
-; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vle32.v v16, (a1)
+; RV32-NEXT: vle32.v v24, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 40
+; RV32-NEXT: li a4, 45
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vle32.v v24, (a3)
+; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, %hi(.LCPI6_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_1)
+; RV32-NEXT: lui a4, 1
+; RV32-NEXT: addi a4, a4, -64
+; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a5, a1, 2
+; RV32-NEXT: add a1, a5, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: addi a1, a5, -64
-; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vle32.v v16, (a3)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
+; RV32-NEXT: li a3, 29
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vmv.s.x v2, a4
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: li a3, 13
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v4
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v24, v4
+; RV32-NEXT: vmv1r.v v0, v2
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 20
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a3, a1, 4
+; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 20
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a3, a1, 4
+; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 21
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vmv4r.v v16, v8
; RV32-NEXT: vslideup.vi v8, v16, 2
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 37
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -283,36 +294,45 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, %hi(.LCPI6_2)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: lui a3, %hi(.LCPI6_3)
-; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3)
-; RV32-NEXT: vle16.v v24, (a1)
-; RV32-NEXT: vle16.v v8, (a3)
+; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 13
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, %hi(.LCPI6_3)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_3)
+; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 40
+; RV32-NEXT: li a3, 45
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v0, v24
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
+; RV32-NEXT: li a3, 13
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v4
+; RV32-NEXT: vmv1r.v v0, v2
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 29
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
@@ -320,8 +340,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v20, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 12
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a3, a1, 2
+; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
@@ -330,171 +350,178 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 21
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v12, v24, v8
+; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: li a3, 13
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmv1r.v v0, v3
-; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t
-; RV32-NEXT: vmv.v.v v4, v12
+; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_5)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: lui a3, %hi(.LCPI6_6)
-; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6)
; RV32-NEXT: vle16.v v24, (a1)
-; RV32-NEXT: vle16.v v8, (a3)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: li a1, 960
-; RV32-NEXT: vmv.s.x v2, a1
+; RV32-NEXT: lui a1, %hi(.LCPI6_6)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6)
+; RV32-NEXT: li a3, 960
+; RV32-NEXT: vle16.v v4, (a1)
+; RV32-NEXT: vmv.s.x v0, a3
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 40
+; RV32-NEXT: li a3, 45
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v16, v24
-; RV32-NEXT: vmv1r.v v0, v2
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 29
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
+; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: li a3, 13
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
-; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
-; RV32-NEXT: vmv.v.v v4, v8
+; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: li a3, 13
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7)
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 21
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v28, v24, v8
+; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v4, v16, v8
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 37
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v28, v8, 4, v0.t
-; RV32-NEXT: vmv.v.v v4, v28
+; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t
; RV32-NEXT: lui a1, %hi(.LCPI6_8)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: lui a3, %hi(.LCPI6_9)
-; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9)
-; RV32-NEXT: vle16.v v28, (a1)
-; RV32-NEXT: vle16.v v24, (a3)
+; RV32-NEXT: vle16.v v0, (a1)
+; RV32-NEXT: lui a1, %hi(.LCPI6_9)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9)
+; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 40
-; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v28
-; RV32-NEXT: vmv1r.v v0, v2
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 45
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v16, v0
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v4, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: lui a1, %hi(.LCPI6_10)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10)
-; RV32-NEXT: vle16.v v4, (a1)
-; RV32-NEXT: lui a1, 15
-; RV32-NEXT: vmv.s.x v6, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a3, 21
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v28, v24, 6
-; RV32-NEXT: vmv1r.v v0, v6
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vslideup.vi v12, v8, 6
+; RV32-NEXT: lui a1, %hi(.LCPI6_10)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10)
+; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: lui a1, 15
+; RV32-NEXT: vmv.s.x v24, a1
+; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 37
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v28, v8, v4, v0.t
+; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
+; RV32-NEXT: vmv.v.v v28, v12
; RV32-NEXT: lui a1, %hi(.LCPI6_11)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: lui a3, %hi(.LCPI6_12)
-; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12)
; RV32-NEXT: vle16.v v0, (a1)
-; RV32-NEXT: vle16.v v24, (a3)
-; RV32-NEXT: li a1, 1008
-; RV32-NEXT: vmv.s.x v7, a1
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, %hi(.LCPI6_12)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12)
+; RV32-NEXT: li a3, 1008
+; RV32-NEXT: vle16.v v4, (a1)
+; RV32-NEXT: vmv.s.x v25, a3
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 40
+; RV32-NEXT: li a3, 45
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v16, v0
-; RV32-NEXT: vmv1r.v v0, v7
+; RV32-NEXT: vmv1r.v v0, v25
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 48
+; RV32-NEXT: li a3, 29
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v28, v8
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 21
+; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_13)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13)
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: vmv1r.v v0, v6
+; RV32-NEXT: vmv1r.v v0, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: slli a3, a1, 3
+; RV32-NEXT: add a1, a3, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 24
+; RV32-NEXT: li a3, 37
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -503,79 +530,70 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, %hi(.LCPI6_14)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: lui a2, %hi(.LCPI6_15)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15)
; RV32-NEXT: vle16.v v16, (a1)
-; RV32-NEXT: vle16.v v8, (a2)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, %hi(.LCPI6_15)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15)
+; RV32-NEXT: vle16.v v28, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 40
+; RV32-NEXT: li a2, 45
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v0, v16
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 48
+; RV32-NEXT: li a2, 29
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a1, a1, a2
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v24, v8
; RV32-NEXT: addi a1, a0, 320
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vse32.v v24, (a1)
; RV32-NEXT: addi a1, a0, 256
-; RV32-NEXT: vse32.v v28, (a1)
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 21
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 16
+; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: addi a1, a0, 192
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 2
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: addi a1, a0, 128
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 3
+; RV32-NEXT: li a3, 13
+; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: addi a1, a0, 64
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 12
-; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: slli a3, a2, 2
+; RV32-NEXT: add a2, a3, a2
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 20
-; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: slli a2, a1, 4
+; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 56
+; RV32-NEXT: li a1, 54
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 16
@@ -586,320 +604,324 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 52
+; RV64-NEXT: li a3, 56
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: sub sp, sp, a2
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x34, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 52 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: addi a2, a1, 256
; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 27
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a2, a2, 5
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: addi a2, a1, 128
; RV64-NEXT: vle64.v v8, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 35
+; RV64-NEXT: li a3, 40
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vle64.v v8, (a1)
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 43
-; RV64-NEXT: mul a1, a1, a2
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v24, (a1)
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vrgather.vi v8, v16, 4
; RV64-NEXT: li a1, 128
-; RV64-NEXT: vmv.s.x v0, a1
+; RV64-NEXT: vmv.s.x v4, a1
; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; RV64-NEXT: vslidedown.vi v24, v16, 8
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vmv1r.v v28, v0
+; RV64-NEXT: vslidedown.vi v16, v16, 8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 24
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
+; RV64-NEXT: vmv1r.v v0, v4
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 19
+; RV64-NEXT: li a2, 20
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vmv.v.v v4, v8
+; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t
+; RV64-NEXT: vmv.v.v v20, v8
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: li a1, 6
; RV64-NEXT: vid.v v8
-; RV64-NEXT: vmul.vx v2, v8, a1
+; RV64-NEXT: vmul.vx v6, v8, a1
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT: vrgatherei16.vv v8, v24, v6
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 43
+; RV64-NEXT: li a2, 48
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v2
+; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: li a1, 56
-; RV64-NEXT: vmv.s.x v1, a1
-; RV64-NEXT: vadd.vi v30, v2, -16
+; RV64-NEXT: vmv.s.x v5, a1
+; RV64-NEXT: vadd.vi v16, v6, -16
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v1
+; RV64-NEXT: vmv1r.v v0, v5
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 35
+; RV64-NEXT: li a2, 40
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v30, v0.t
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v4, v8
+; RV64-NEXT: vmv.v.v v20, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 4
-; RV64-NEXT: sub a1, a2, a1
+; RV64-NEXT: slli a1, a1, 4
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 27
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a1, a1, 5
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v4, v16, 5
-; RV64-NEXT: vmv1r.v v0, v28
-; RV64-NEXT: vrgather.vi v4, v24, 3, v0.t
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs2r.v v2, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vadd.vi v16, v2, 1
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT: vrgather.vi v24, v16, 5
+; RV64-NEXT: vmv1r.v v0, v4
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 43
+; RV64-NEXT: li a2, 24
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v24, v16
-; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v16, v2, -15
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vadd.vi v28, v6, 1
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 11
+; RV64-NEXT: li a2, 48
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs2r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v28
+; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT: vadd.vi v28, v6, -15
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v1
+; RV64-NEXT: vmv1r.v v0, v5
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 35
+; RV64-NEXT: li a2, 40
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
+; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
+; RV64-NEXT: vmv.v.v v24, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 11
+; RV64-NEXT: li a2, 12
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v2, v0.t
-; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v4, v8
+; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vmv2r.v v26, v6
+; RV64-NEXT: vadd.vi v24, v6, 2
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 11
+; RV64-NEXT: li a2, 48
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vadd.vi v6, v2, 2
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v8, v24, v6
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v0, v24
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: li a1, 24
-; RV64-NEXT: vmv.s.x v7, a1
-; RV64-NEXT: vadd.vi v26, v2, -14
+; RV64-NEXT: vmv.s.x v0, a1
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vadd.vi v24, v26, -14
+; RV64-NEXT: vmv2r.v v6, v26
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v7
-; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t
+; RV64-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, 6
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 27
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a1, a1, 5
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v24, v12
+; RV64-NEXT: vrgatherei16.vv v20, v24, v12
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 20
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl1r.v v6, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vmv1r.v v0, v6
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 19
+; RV64-NEXT: li a2, 24
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v16, v24, 4, v0.t
+; RV64-NEXT: vrgather.vi v20, v24, 4, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v16, v8
+; RV64-NEXT: vmv.v.v v20, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 3
-; RV64-NEXT: sub a1, a2, a1
+; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v28, v2, 3
+; RV64-NEXT: vmv2r.v v10, v6
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 6
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs2r.v v6, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vadd.vi v8, v6, 3
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 43
+; RV64-NEXT: li a2, 48
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v28
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v16, v0, v8
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v28, v2, -13
+; RV64-NEXT: vadd.vi v28, v10, -13
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v7
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 35
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 40
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
+; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v16, v8, v28, v0.t
; RV64-NEXT: lui a1, 16
; RV64-NEXT: addi a1, a1, 7
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 27
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a1, a1, 5
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v24, v12
-; RV64-NEXT: vmv1r.v v0, v6
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv4r.v v8, v0
+; RV64-NEXT: vrgatherei16.vv v20, v0, v12
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 19
+; RV64-NEXT: li a2, 20
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v16, v24, 5, v0.t
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v16, v8
+; RV64-NEXT: vmv.v.v v20, v16
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 1
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: li a2, 20
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
; RV64-NEXT: lui a1, 96
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a1
+; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: li a1, 192
; RV64-NEXT: vmv.s.x v0, a1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vrgather.vi v28, v8, 2
+; RV64-NEXT: vrgatherei16.vv v28, v24, v12, v0.t
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 27
+; RV64-NEXT: li a2, 6
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v4, v16, 2
-; RV64-NEXT: vrgatherei16.vv v4, v24, v8, v0.t
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v16, v2, 4
+; RV64-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vadd.vi v16, v24, 4
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 43
+; RV64-NEXT: li a2, 48
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v24, v16
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v0, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: li a1, 28
-; RV64-NEXT: vmv.s.x v1, a1
-; RV64-NEXT: vadd.vi v16, v2, -12
+; RV64-NEXT: vmv.s.x v0, a1
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vadd.vi v26, v24, -12
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 35
+; RV64-NEXT: li a2, 40
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v4, v8
+; RV64-NEXT: vmv.v.v v28, v8
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
; RV64-NEXT: lui a1, 112
; RV64-NEXT: addi a1, a1, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 27
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a1, a1, 5
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgather.vi v8, v16, 3
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 19
+; RV64-NEXT: li a2, 24
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v12, v2, 5
+; RV64-NEXT: vadd.vi v12, v24, 5
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 43
+; RV64-NEXT: li a2, 48
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v24, v12
+; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v16, v0, v12
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v12, v2, -11
+; RV64-NEXT: vadd.vi v12, v24, -11
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v1
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 35
+; RV64-NEXT: li a2, 40
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -911,40 +933,43 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: vse64.v v4, (a1)
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a2, a2, 1
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 16
+; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 192
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 1
-; RV64-NEXT: add a2, a3, a2
+; RV64-NEXT: li a3, 20
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 128
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a3, a2, 3
-; RV64-NEXT: sub a2, a3, a2
+; RV64-NEXT: slli a2, a2, 3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 64
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 11
+; RV64-NEXT: li a3, 12
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a2, a1, 4
-; RV64-NEXT: sub a1, a2, a1
+; RV64-NEXT: slli a1, a1, 4
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: li a1, 52
+; RV64-NEXT: li a1, 56
; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index c295fed2c28c10..023d707f07bff7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -286,8 +286,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, zero
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -303,8 +303,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
-; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
@@ -331,8 +331,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, zero
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -348,8 +348,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
-; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
@@ -375,8 +375,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -391,8 +391,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 4f7b885d998e5b..7fc442c88d101b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -25,10 +25,10 @@ define void @splat_v1i1(ptr %x, i1 %y) {
; CHECK-LABEL: splat_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: andi a1, a1, 1
+; CHECK-NEXT: vmv.s.x v9, a1
+; CHECK-NEXT: vmsne.vi v0, v9, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 9fbc22221f99bd..539a8403c93521 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -12728,8 +12728,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-NEXT: ret
;
@@ -12803,8 +12803,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64V-NEXT: vmv.v.x v8, a3
; RV64V-NEXT: vslide1down.vx v8, v8, a5
; RV64V-NEXT: vslide1down.vx v8, v8, a6
-; RV64V-NEXT: vmv.v.i v0, 15
; RV64V-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-NEXT: vmv.v.i v0, 15
; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64V-NEXT: addi sp, s0, -128
; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
@@ -12854,8 +12854,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: vmv.v.x v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -12896,8 +12896,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
@@ -12941,8 +12941,8 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
@@ -12986,8 +12986,8 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
@@ -13031,8 +13031,8 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
@@ -13074,8 +13074,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
@@ -13120,8 +13120,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
@@ -13167,8 +13167,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
@@ -13217,8 +13217,8 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
@@ -13264,8 +13264,8 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
@@ -13320,8 +13320,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
@@ -13367,8 +13367,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 2a0ec47a3de01c..5f456c7824316b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -221,10 +221,10 @@ define i32 @reduce_sum_16xi32_prefix7(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix7:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vslideup.vi v8, v10, 7
-; CHECK-NEXT: vredsum.vs v8, v8, v10
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vslideup.vi v10, v8, 7
+; CHECK-NEXT: vredsum.vs v8, v10, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -248,9 +248,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmv.s.x v10, zero
-; CHECK-NEXT: vredsum.vs v8, v8, v10
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vredsum.vs v8, v10, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -670,15 +670,15 @@ define i32 @reduce_smax_16xi32_prefix5(ptr %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 5
+; CHECK-NEXT: vslideup.vi v10, v8, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 6
+; CHECK-NEXT: vslideup.vi v10, v8, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 7
-; CHECK-NEXT: vredmax.vs v8, v8, v8
+; CHECK-NEXT: vslideup.vi v10, v8, 7
+; CHECK-NEXT: vredmax.vs v8, v10, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -715,15 +715,15 @@ define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 5
+; CHECK-NEXT: vslideup.vi v10, v8, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 6
+; CHECK-NEXT: vslideup.vi v10, v8, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 7
-; CHECK-NEXT: vredmin.vs v8, v8, v8
+; CHECK-NEXT: vslideup.vi v10, v8, 7
+; CHECK-NEXT: vredmin.vs v8, v10, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -830,9 +830,9 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_fadd_16xf32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmv.s.x v9, zero
-; CHECK-NEXT: vfredusum.vs v8, v8, v9
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vfredusum.vs v8, v9, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x float>, ptr %p, align 256
@@ -847,15 +847,15 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 5
+; CHECK-NEXT: vslideup.vi v10, v8, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 6
+; CHECK-NEXT: vslideup.vi v10, v8, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 7
-; CHECK-NEXT: vfredusum.vs v8, v8, v10
+; CHECK-NEXT: vslideup.vi v10, v8, 7
+; CHECK-NEXT: vfredusum.vs v8, v10, v8
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x float>, ptr %p, align 256
diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
index 032d32109933f3..ab7da9e0faf2b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
@@ -49,8 +49,8 @@ define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) {
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrsub.vi v12, v11, 7
; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vrsub.vi v8, v11, 3
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -174,8 +174,8 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) {
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrsub.vi v12, v11, 7
; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vrsub.vi v8, v11, 3
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
@@ -492,8 +492,8 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) {
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrsub.vi v12, v11, 7
; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vrsub.vi v8, v11, 3
+; CHECK-NEXT: vmv.v.i v0, 15
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 9a5e86d61c265e..922692ed88c9f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -107,14 +107,14 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs
; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vand.vi v26, v24, 1
-; CHECK-NEXT: vmsne.vi v28, v26, 0
-; CHECK-NEXT: vsrl.vi v24, v24, 1
+; CHECK-NEXT: vsrl.vi v26, v24, 1
+; CHECK-NEXT: vand.vi v24, v24, 1
+; CHECK-NEXT: vmsne.vi v28, v24, 0
; CHECK-NEXT: vmv1r.v v0, v28
-; CHECK-NEXT: vadd.vx v24, v24, a2, v0.t
+; CHECK-NEXT: vadd.vx v26, v26, a2, v0.t
; CHECK-NEXT: vmv4r.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v0, v8, v24
+; CHECK-NEXT: vrgatherei16.vv v0, v8, v26
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
@@ -123,7 +123,7 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v8, v16, v24
+; CHECK-NEXT: vrgatherei16.vv v8, v16, v26
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vs8r.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 4b6ad0f27214d0..327e18e913819e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -122,9 +122,9 @@ define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64>
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vand.vi v13, v12, 1
-; CHECK-NEXT: vmsne.vi v0, v13, 0
; CHECK-NEXT: vsrl.vi v16, v12, 1
+; CHECK-NEXT: vand.vi v12, v12, 1
+; CHECK-NEXT: vmsne.vi v0, v12, 0
; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -137,9 +137,9 @@ define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64>
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; ZVBB-NEXT: vid.v v12
-; ZVBB-NEXT: vand.vi v13, v12, 1
-; ZVBB-NEXT: vmsne.vi v0, v13, 0
; ZVBB-NEXT: vsrl.vi v16, v12, 1
+; ZVBB-NEXT: vand.vi v12, v12, 1
+; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t
; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16
@@ -288,32 +288,44 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vand.vi v26, v24, 1
-; CHECK-NEXT: vmsne.vi v10, v26, 0
-; CHECK-NEXT: vsrl.vi v8, v24, 1
-; CHECK-NEXT: vmv8r.v v24, v0
-; CHECK-NEXT: vmv4r.v v12, v4
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v6, v24, 1
+; CHECK-NEXT: vand.vi v8, v24, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vadd.vx v6, v6, a0, v0.t
; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
+; CHECK-NEXT: vrgatherei16.vv v8, v24, v6
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v6
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -323,32 +335,44 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
; ZVBB-NEXT: addi sp, sp, -16
; ZVBB-NEXT: .cfi_def_cfa_offset 16
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: slli a0, a0, 4
; ZVBB-NEXT: sub sp, sp, a0
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVBB-NEXT: vmv8r.v v0, v8
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: add a0, sp, a0
+; ZVBB-NEXT: addi a0, a0, 16
+; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: srli a0, a0, 1
; ZVBB-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; ZVBB-NEXT: vid.v v24
-; ZVBB-NEXT: vand.vi v26, v24, 1
-; ZVBB-NEXT: vmsne.vi v10, v26, 0
-; ZVBB-NEXT: vsrl.vi v8, v24, 1
-; ZVBB-NEXT: vmv8r.v v24, v0
-; ZVBB-NEXT: vmv4r.v v12, v4
-; ZVBB-NEXT: vmv1r.v v0, v10
-; ZVBB-NEXT: vadd.vx v8, v8, a0, v0.t
+; ZVBB-NEXT: vsrl.vi v6, v24, 1
+; ZVBB-NEXT: vand.vi v8, v24, 1
+; ZVBB-NEXT: vmsne.vi v0, v8, 0
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: slli a1, a1, 3
+; ZVBB-NEXT: add a1, sp, a1
+; ZVBB-NEXT: addi a1, a1, 16
+; ZVBB-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; ZVBB-NEXT: vadd.vx v6, v6, a0, v0.t
; ZVBB-NEXT: vmv4r.v v28, v16
; ZVBB-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; ZVBB-NEXT: vrgatherei16.vv v0, v24, v8
+; ZVBB-NEXT: vrgatherei16.vv v8, v24, v6
; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: add a0, sp, a0
+; ZVBB-NEXT: addi a0, a0, 16
+; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVBB-NEXT: vmv4r.v v16, v12
-; ZVBB-NEXT: vrgatherei16.vv v24, v16, v8
+; ZVBB-NEXT: vrgatherei16.vv v24, v16, v6
+; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVBB-NEXT: vmv.v.v v16, v24
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: slli a0, a0, 4
; ZVBB-NEXT: add sp, sp, a0
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: ret
@@ -492,9 +516,9 @@ define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x do
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vand.vi v13, v12, 1
-; CHECK-NEXT: vmsne.vi v0, v13, 0
; CHECK-NEXT: vsrl.vi v16, v12, 1
+; CHECK-NEXT: vand.vi v12, v12, 1
+; CHECK-NEXT: vmsne.vi v0, v12, 0
; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -507,9 +531,9 @@ define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x do
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; ZVBB-NEXT: vid.v v12
-; ZVBB-NEXT: vand.vi v13, v12, 1
-; ZVBB-NEXT: vmsne.vi v0, v13, 0
; ZVBB-NEXT: vsrl.vi v16, v12, 1
+; ZVBB-NEXT: vand.vi v12, v12, 1
+; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t
; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16
@@ -588,32 +612,44 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vand.vi v26, v24, 1
-; CHECK-NEXT: vmsne.vi v10, v26, 0
-; CHECK-NEXT: vsrl.vi v8, v24, 1
-; CHECK-NEXT: vmv8r.v v24, v0
-; CHECK-NEXT: vmv4r.v v12, v4
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: vsrl.vi v6, v24, 1
+; CHECK-NEXT: vand.vi v8, v24, 1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vadd.vx v6, v6, a0, v0.t
; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
+; CHECK-NEXT: vrgatherei16.vv v8, v24, v6
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v6
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -623,32 +659,44 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
; ZVBB-NEXT: addi sp, sp, -16
; ZVBB-NEXT: .cfi_def_cfa_offset 16
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: slli a0, a0, 4
; ZVBB-NEXT: sub sp, sp, a0
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVBB-NEXT: vmv8r.v v0, v8
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: add a0, sp, a0
+; ZVBB-NEXT: addi a0, a0, 16
+; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: srli a0, a0, 1
; ZVBB-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; ZVBB-NEXT: vid.v v24
-; ZVBB-NEXT: vand.vi v26, v24, 1
-; ZVBB-NEXT: vmsne.vi v10, v26, 0
-; ZVBB-NEXT: vsrl.vi v8, v24, 1
-; ZVBB-NEXT: vmv8r.v v24, v0
-; ZVBB-NEXT: vmv4r.v v12, v4
-; ZVBB-NEXT: vmv1r.v v0, v10
-; ZVBB-NEXT: vadd.vx v8, v8, a0, v0.t
+; ZVBB-NEXT: vsrl.vi v6, v24, 1
+; ZVBB-NEXT: vand.vi v8, v24, 1
+; ZVBB-NEXT: vmsne.vi v0, v8, 0
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: slli a1, a1, 3
+; ZVBB-NEXT: add a1, sp, a1
+; ZVBB-NEXT: addi a1, a1, 16
+; ZVBB-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; ZVBB-NEXT: vadd.vx v6, v6, a0, v0.t
; ZVBB-NEXT: vmv4r.v v28, v16
; ZVBB-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; ZVBB-NEXT: vrgatherei16.vv v0, v24, v8
+; ZVBB-NEXT: vrgatherei16.vv v8, v24, v6
; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: add a0, sp, a0
+; ZVBB-NEXT: addi a0, a0, 16
+; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVBB-NEXT: vmv4r.v v16, v12
-; ZVBB-NEXT: vrgatherei16.vv v24, v16, v8
+; ZVBB-NEXT: vrgatherei16.vv v24, v16, v6
+; ZVBB-NEXT: addi a0, sp, 16
; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVBB-NEXT: vmv.v.v v16, v24
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 3
+; ZVBB-NEXT: slli a0, a0, 4
; ZVBB-NEXT: add sp, sp, a0
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
index ffeb399291e1f9..e7184921d87a08 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -190,12 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfeq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16(
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfeq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16(
@@ -294,10 +289,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -346,10 +340,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -398,12 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfeq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32(
@@ -450,12 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfeq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32(
@@ -502,10 +493,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfeq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmfeq.vv v0, v8, v9
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -554,12 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfeq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v10
+; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64(
@@ -606,12 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfeq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfeq.vv v0, v8, v12
+; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
index 993b50a1c81ce8..a6dad9eaa4f358 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v9, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v9, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v9, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -190,12 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16(
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16(
@@ -294,10 +289,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v9, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -346,10 +340,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v9, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -398,12 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32(
@@ -450,12 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32(
@@ -502,10 +493,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v9, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -554,12 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v10, v8
+; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64(
@@ -606,12 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v12, v8
+; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
index 427f0eb28e7df5..f643a4036381c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v9, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v9, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v9, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -190,12 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16(
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16(
@@ -294,10 +289,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v9, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -346,10 +340,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v9, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -398,12 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32(
@@ -450,12 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32(
@@ -502,10 +493,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v9, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -554,12 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v10, v8
+; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64(
@@ -606,12 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v12, v8
+; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
index e5327632fc04f6..6c52364c1fbd56 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v8, v9
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v8, v9
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v8, v9
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -190,12 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16(
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16(
@@ -294,10 +289,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v8, v9
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -346,10 +340,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v8, v9
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -398,12 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32(
@@ -450,12 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32(
@@ -502,10 +493,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmfle.vv v0, v8, v9
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -554,12 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v10
+; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64(
@@ -606,12 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfle.vv v0, v8, v12
+; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
index 64f257e355ceae..37a9c6b081a1df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v8, v9
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v8, v9
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v8, v9
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -190,12 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16(
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16(
@@ -294,10 +289,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v8, v9
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -346,10 +340,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v8, v9
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -398,12 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32(
@@ -450,12 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32(
@@ -502,10 +493,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmflt.vv v0, v8, v9
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -554,12 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v10
+; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64(
@@ -606,12 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmflt.vv v0, v8, v12
+; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
index 6f6a2a5e8783c6..5defce42091e55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmfne.vv v0, v8, v9
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmfne.vv v0, v8, v9
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmfne.vv v0, v8, v9
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -190,12 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16(
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16(
@@ -294,10 +289,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmfne.vv v0, v8, v9
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -346,10 +340,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmfne.vv v0, v8, v9
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -398,12 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32(
@@ -450,12 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32(
@@ -502,10 +493,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmfne.vv v0, v8, v9
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -554,12 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v10
+; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64(
@@ -606,12 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmfne.vv v0, v8, v12
+; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
index da1c751b566304..cc6c1f585bb7d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v10
+; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v12
+; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v10
+; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v12
+; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v10
+; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v12
+; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmseq.vv v0, v8, v9
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v10
+; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmseq.vv v0, v8, v12
+; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
index 502fb9b24148f7..c8f9b60a3f2da6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v10, v8
+; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v12, v8
+; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v10, v8
+; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v12, v8
+; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v10, v8
+; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v12, v8
+; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v9, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v10, v8
+; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v12, v8
+; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
index 9410a99d81423f..b6c6d9e90f6109 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v10, v8
+; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v12, v8
+; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v10, v8
+; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v12, v8
+; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v10, v8
+; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v12, v8
+; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v9, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v10, v8
+; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v12, v8
+; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
index b7a676e7f2dd37..dfd7096a65ebb9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v10, v8
+; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v12, v8
+; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsgt.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v10, v8
+; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v12, v8
+; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v10, v8
+; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v12, v8
+; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v9, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v10, v8
+; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v12, v8
+; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
index 88a632de067a68..8826be03bbebb8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v10, v8
+; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v12, v8
+; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v10, v8
+; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v12, v8
+; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v10, v8
+; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v12, v8
+; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v9, v8
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v9, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v10, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v10, v8
+; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v12, v8
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v12, v8
+; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
index 2248ba03adfe79..5d5a28edbfe151 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v10
+; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v12
+; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsle.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v10
+; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v12
+; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v10
+; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v12
+; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsle.vv v0, v8, v9
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v10
+; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsle.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsle.vv v0, v8, v12
+; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
index 57bae83b25e0e5..c58ac2d0718314 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v10
+; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v12
+; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsleu.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v10
+; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v12
+; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v10
+; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v12
+; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsleu.vv v0, v8, v9
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v10
+; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsleu.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsleu.vv v0, v8, v12
+; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
index 6783f7feb624c5..6c6e580b043d1a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v10
+; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v12
+; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmslt.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v10
+; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v12
+; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v10
+; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v12
+; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmslt.vv v0, v8, v9
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v10
+; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmslt.vv v0, v8, v12
+; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
index b082b735a02072..76f3e449ab58f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v10
+; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v12
+; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsltu.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v10
+; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v12
+; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v10
+; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v12
+; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsltu.vv v0, v8, v9
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v10
+; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsltu.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsltu.vv v0, v8, v12
+; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
index bb4575e5d72cbe..161c1bc4314fcb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
@@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v10
+; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i8(
@@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v12
+; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsne.nxv32i8(
@@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v10
+; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i16(
@@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v12
+; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i16(
@@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v10
+; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32(
@@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v12
+; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i32(
@@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v8, v8, v9
; CHECK-NEXT: vmv1r.v v11, v0
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT: vmsne.vv v0, v8, v9
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v14, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v10
+; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
; CHECK-NEXT: vmv1r.v v0, v14
-; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsne.nxv2i64(
@@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v20, v8, v12
-; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: vmsne.vv v0, v8, v12
+; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v20
-; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index e8620c848f8d3d..39f517a100f527 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \
-# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
+# RUN: -run-pass=riscv-insert-vsetvli,riscv-coalesce-vsetvli | FileCheck %s
--- |
source_filename = "vsetvli-insert.ll"
@@ -166,7 +166,7 @@ body: |
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8
%2:gprnox0 = COPY $x11
@@ -208,7 +208,7 @@ body: |
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 %pt, [[COPY1]], $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: %dead:vr = IMPLICIT_DEF
- ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY %3
; CHECK-NEXT: PseudoRET implicit $v8
%1:gprnox0 = COPY $x11
@@ -282,8 +282,8 @@ body: |
; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x)
; CHECK-NEXT: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt2, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.y)
; CHECK-NEXT: %pt3:vr = IMPLICIT_DEF
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x)
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, [[PseudoVLE64_V_M1_]], [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoVSE64_V_M1 [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x)
; CHECK-NEXT: PseudoRET
%1:gpr = COPY $x11
%0:gpr = COPY $x10
@@ -328,8 +328,8 @@ body: |
; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S killed [[PseudoVREDSUM_VS_M1_E8_]], 6 /* e64 */, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], [[PseudoVLE64_V_M1_]], [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S [[PseudoVREDSUM_VS_M1_E8_]], 6 /* e64 */, implicit $vtype
; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gpr = COPY $x10
@@ -418,7 +418,7 @@ body: |
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8
%2:gprnox0 = COPY $x11
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index d0b76e7e4535b8..fcd852f1210df5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -27,7 +27,7 @@ body: |
; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3
; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22
- ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: $x12 = PseudoReadVLENB
More information about the llvm-commits
mailing list