[llvm] fc13353 - Revert "[RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (#88295)"
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 08:27:59 PDT 2024
Author: Luke Lau
Date: 2024-04-24T23:27:01+08:00
New Revision: fc13353e10b443101b7304c5f90d70a06e20e589
URL: https://github.com/llvm/llvm-project/commit/fc13353e10b443101b7304c5f90d70a06e20e589
DIFF: https://github.com/llvm/llvm-project/commit/fc13353e10b443101b7304c5f90d70a06e20e589.diff
LOG: Revert "[RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc (#88295)"
Seems to cause an address sanitizer failure on one of the buildbots related
to live intervals.
Added:
Modified:
llvm/lib/Target/RISCV/RISCV.h
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/O0-pipeline.ll
llvm/test/CodeGen/RISCV/O3-pipeline.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
llvm/test/CodeGen/RISCV/rvv/vmfge.ll
llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
llvm/test/CodeGen/RISCV/rvv/vmfle.ll
llvm/test/CodeGen/RISCV/rvv/vmflt.ll
llvm/test/CodeGen/RISCV/rvv/vmfne.ll
llvm/test/CodeGen/RISCV/rvv/vmseq.ll
llvm/test/CodeGen/RISCV/rvv/vmsge.ll
llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
llvm/test/CodeGen/RISCV/rvv/vmsle.ll
llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
llvm/test/CodeGen/RISCV/rvv/vmslt.ll
llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
llvm/test/CodeGen/RISCV/rvv/vmsne.ll
llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index d405395dcf9ec4..7af543f018ccbd 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -61,9 +61,6 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
-FunctionPass *createRISCVCoalesceVSETVLIPass();
-void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);
-
FunctionPass *createRISCVPostRAExpandPseudoPass();
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertReadWriteCSRPass();
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a57d6afcb25586..15efcf1dd1f7ec 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -27,19 +27,16 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-vsetvli"
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
-#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
-STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
+STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -193,11 +190,6 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
if (UseMO.getReg() == RISCV::NoRegister)
return true;
- if (UseMO.isUndef())
- return true;
- if (UseMO.getReg().isPhysical())
- return false;
-
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
if (UseMI->isImplicitDef())
return true;
@@ -788,40 +780,11 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
VSETVLIInfo &Info) const;
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
void emitVSETVLIs(MachineBasicBlock &MBB);
+ void doLocalPostpass(MachineBasicBlock &MBB);
void doPRE(MachineBasicBlock &MBB);
void insertReadVL(MachineBasicBlock &MBB);
};
-class RISCVCoalesceVSETVLI : public MachineFunctionPass {
-public:
- static char ID;
- const RISCVSubtarget *ST;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- LiveIntervals *LIS;
-
- RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
-
- AU.addRequired<LiveIntervals>();
- AU.addPreserved<LiveIntervals>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- AU.addPreserved<LiveDebugVariables>();
- AU.addPreserved<LiveStacks>();
-
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
-
-private:
- bool coalesceVSETVLIs(MachineBasicBlock &MBB);
-};
-
} // end anonymous namespace
char RISCVInsertVSETVLI::ID = 0;
@@ -829,11 +792,6 @@ char RISCVInsertVSETVLI::ID = 0;
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
false, false)
-char RISCVCoalesceVSETVLI::ID = 0;
-
-INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
- RISCV_COALESCE_VSETVLI_NAME, false, false)
-
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1557,12 +1515,12 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
auto &AVL = MI.getOperand(1);
auto &PrevAVL = PrevMI.getOperand(1);
+ assert(MRI.isSSA());
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
// For now just check that PrevMI uses the same virtual register.
if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
- (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
- PrevAVL.getReg() != AVL.getReg()))
+ (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()))
return false;
}
@@ -1572,7 +1530,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
-bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
+void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
@@ -1605,28 +1563,8 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
if (!isVLPreservingConfig(*NextMI)) {
- Register DefReg = NextMI->getOperand(0).getReg();
-
- MI.getOperand(0).setReg(DefReg);
+ MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
MI.getOperand(0).setIsDead(false);
-
- // The def of DefReg moved to MI, so extend the LiveInterval up to
- // it.
- if (DefReg.isVirtual()) {
- LiveInterval &DefLI = LIS->getInterval(DefReg);
- SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
- VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
- LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
- DefLI.addSegment(S);
- DefVNI->def = MISlot;
- // Mark DefLI as spillable if it was previously unspillable
- DefLI.setWeight(0);
-
- // DefReg may have had no uses, in which case we need to shrink
- // the LiveInterval up to MI.
- LIS->shrinkToUses(&DefLI);
- }
-
Register OldVLReg;
if (MI.getOperand(1).isReg())
OldVLReg = MI.getOperand(1).getReg();
@@ -1634,20 +1572,11 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
else
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
-
- // Clear NextMI's AVL early so we're not counting it as a use.
- if (NextMI->getOperand(1).isReg())
- NextMI->getOperand(1).setReg(RISCV::NoRegister);
-
if (OldVLReg) {
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg))
VLOpDef->eraseFromParent();
-
- // NextMI no longer uses OldVLReg so shrink its LiveInterval.
- if (OldVLReg.isVirtual())
- LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
}
MI.setDesc(NextMI->getDesc());
}
@@ -1660,13 +1589,9 @@ bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
Used = getDemanded(MI, MRI, ST);
}
- NumCoalescedVSETVL += ToDelete.size();
- for (auto *MI : ToDelete) {
- LIS->RemoveMachineInstrFromMaps(*MI);
+ NumRemovedVSETVL += ToDelete.size();
+ for (auto *MI : ToDelete)
MI->eraseFromParent();
- }
-
- return !ToDelete.empty();
}
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
@@ -1741,6 +1666,15 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
emitVSETVLIs(MBB);
+ // Now that all vsetvlis are explicit, go through and do block local
+ // DSE and peephole based demanded fields based transforms. Note that
+ // this *must* be done outside the main dataflow so long as we allow
+ // any cross block analysis within the dataflow. We can't have both
+ // demanded fields based mutation and non-local analysis in the
+ // dataflow at the same time without introducing inconsistencies.
+ for (MachineBasicBlock &MBB : MF)
+ doLocalPostpass(MBB);
+
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
for (MachineBasicBlock &MBB : MF)
@@ -1754,29 +1688,3 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
return new RISCVInsertVSETVLI();
}
-
-// Now that all vsetvlis are explicit, go through and do block local
-// DSE and peephole based demanded fields based transforms. Note that
-// this *must* be done outside the main dataflow so long as we allow
-// any cross block analysis within the dataflow. We can't have both
-// demanded fields based mutation and non-local analysis in the
-// dataflow at the same time without introducing inconsistencies.
-bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
- // Skip if the vector extension is not enabled.
- ST = &MF.getSubtarget<RISCVSubtarget>();
- if (!ST->hasVInstructions())
- return false;
- TII = ST->getInstrInfo();
- MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervals>();
-
- bool Changed = false;
- for (MachineBasicBlock &MBB : MF)
- Changed |= coalesceVSETVLIs(MBB);
-
- return Changed;
-}
-
-FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
- return new RISCVCoalesceVSETVLI();
-}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 0876f46728a10c..34ddd635231087 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -116,7 +116,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
- initializeRISCVCoalesceVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
@@ -389,14 +388,12 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
addPass(createRVVRegAllocPass(false));
- addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteFast();
}
bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
addPass(createRVVRegAllocPass(true));
addPass(createVirtRegRewriter(false));
- addPass(createRISCVCoalesceVSETVLIPass());
return TargetPassConfig::addRegAssignAndRewriteOptimized();
}
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index 56bd4bd0c08f09..faf37545e1a117 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -47,10 +47,6 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
-; CHECK-NEXT: MachineDominator Tree Construction
-; CHECK-NEXT: Slot index numbering
-; CHECK-NEXT: Live Interval Analysis
-; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
; CHECK-NEXT: Fixup Statepoint Caller Saved
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
index 4121d111091117..90472f246918f3 100644
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -143,7 +143,6 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Greedy Register Allocator
; CHECK-NEXT: Virtual Register Rewriter
-; CHECK-NEXT: RISC-V Coalesce VSETVLI pass
; CHECK-NEXT: Virtual Register Map
; CHECK-NEXT: Live Register Matrix
; CHECK-NEXT: Greedy Register Allocator
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 9e83efd3519539..8e214e40547832 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
@@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-NEXT: vfmv.v.f v8, fa4
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index ed152e64a91ef4..6bfd0ac932672f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
+; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
@@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
-; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
+; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index a8e4af2d7368e8..85b849045e8cee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa5, fa5, fa4
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
@@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa5, fa5, fa4
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
+; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
@@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v9, v9, a0
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV32-NEXT: vse8.v v9, (a1)
; RV32-NEXT: addi sp, s0, -128
@@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vmv.v.i v0, 15
+; RV64-NEXT: vslide1down.vx v9, v9, a0
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
; RV64-NEXT: vse8.v v9, (a1)
; RV64-NEXT: addi sp, s0, -128
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 40ff8b50d99d8d..6da83644413bc2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV32-V512-NEXT: vid.v v10
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
-; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vmv.v.i v0, 10
+; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
; RV32-V512-NEXT: vmv.v.v v8, v10
; RV32-V512-NEXT: ret
@@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
; RV64-V512-NEXT: vid.v v10
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
-; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vmv.v.i v0, 10
+; RV64-V512-NEXT: vrgather.vv v10, v8, v11
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
; RV64-V512-NEXT: vmv.v.v v8, v10
; RV64-V512-NEXT: ret
@@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; V128-NEXT: vid.v v8
; V128-NEXT: vsrl.vi v8, v8, 1
-; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vmv.v.i v0, 10
+; V128-NEXT: vadd.vi v8, v8, 1
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
; V128-NEXT: vmv.v.v v8, v10
; V128-NEXT: ret
@@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
; V512-NEXT: vid.v v8
; V512-NEXT: vsrl.vi v8, v8, 1
-; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vmv.v.i v0, 10
+; V512-NEXT: vadd.vi v8, v8, 1
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 58af6ac246d161..0e8d9cf0306690 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) {
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v11, (a0)
-; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vmv.v.i v0, 8
+; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -162,16 +162,16 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vmv.v.i v16, 2
+; RV32-NEXT: li a0, 5
+; RV32-NEXT: vslide1down.vx v20, v16, a0
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0)
-; RV32-NEXT: vle16.v v20, (a0)
-; RV32-NEXT: li a0, 5
-; RV32-NEXT: vslide1down.vx v21, v16, a0
+; RV32-NEXT: vle16.v v21, (a0)
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v16, v8, v20
; RV32-NEXT: li a0, 164
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t
+; RV32-NEXT: vrgatherei16.vv v16, v8, v21
+; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
@@ -210,13 +210,13 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: vmv.v.i v20, -1
-; RV32-NEXT: vrgatherei16.vv v12, v20, v16
; RV32-NEXT: lui a0, %hi(.LCPI12_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1)
-; RV32-NEXT: vle16.v v16, (a0)
+; RV32-NEXT: vle16.v v17, (a0)
; RV32-NEXT: li a0, 113
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
+; RV32-NEXT: vrgatherei16.vv v12, v20, v16
+; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -368,9 +368,9 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -387,9 +387,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
; CHECK-NEXT: vmv.s.x v11, a0
; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -422,9 +422,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v11, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: li a0, 66
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v11
; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -441,9 +441,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v11, v10, 2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: li a0, 70
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vi v10, v8, 2
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -464,9 +464,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) {
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v12, a0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: li a0, 98
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vrgather.vv v10, v8, v12
; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -681,9 +681,9 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w
; CHECK-LABEL: merge_non_contiguous_slideup_slidedown:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 8, i32 4, i32 10, i32 6, i32 12, i32 13, i32 14>
@@ -695,12 +695,12 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: unmergable:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: lui a0, %hi(.LCPI46_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: li a0, 234
; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t
; CHECK-NEXT: ret
%res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 2, i32 9, i32 4, i32 11, i32 6, i32 13, i32 8, i32 15>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 99364264de8293..f98cb343a2ab42 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -159,17 +159,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 54
+; RV32-NEXT: li a3, 56
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: sub sp, sp, a2
-; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x36, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 54 * vlenb
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; RV32-NEXT: addi a3, a1, 256
; RV32-NEXT: li a2, 32
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v16, (a3)
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: li a4, 21
-; RV32-NEXT: mul a3, a3, a4
+; RV32-NEXT: slli a3, a3, 5
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
@@ -177,30 +176,31 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vslideup.vi v8, v16, 4
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a5, a4, 3
-; RV32-NEXT: add a4, a5, a4
+; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 12
-; RV32-NEXT: vmv.s.x v0, a4
-; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: add a4, sp, a4
-; RV32-NEXT: addi a4, a4, 16
-; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vmv.s.x v3, a4
; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; RV32-NEXT: vslidedown.vi v16, v16, 16
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 37
+; RV32-NEXT: li a5, 24
; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 2
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
+; RV32-NEXT: vs1r.v v3, (a4) # Unknown-size Folded Spill
; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: slli a5, a4, 4
-; RV32-NEXT: add a4, a5, a4
+; RV32-NEXT: li a5, 20
+; RV32-NEXT: mul a4, a4, a5
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
@@ -209,82 +209,71 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
; RV32-NEXT: vle16.v v8, (a4)
; RV32-NEXT: csrr a4, vlenb
-; RV32-NEXT: li a5, 13
-; RV32-NEXT: mul a4, a4, a5
+; RV32-NEXT: slli a4, a4, 3
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT: vle32.v v24, (a1)
+; RV32-NEXT: lui a4, %hi(.LCPI6_1)
+; RV32-NEXT: addi a4, a4, %lo(.LCPI6_1)
+; RV32-NEXT: lui a5, 1
+; RV32-NEXT: vle16.v v8, (a4)
+; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT: vle32.v v16, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a4, 45
+; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a1, a1, a4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: lui a1, %hi(.LCPI6_1)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_1)
-; RV32-NEXT: lui a4, 1
-; RV32-NEXT: addi a4, a4, -64
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a5, a1, 2
-; RV32-NEXT: add a1, a5, a1
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vle32.v v16, (a3)
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vle32.v v24, (a3)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 29
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vmv.s.x v2, a4
+; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: addi a1, a5, -64
+; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 13
+; RV32-NEXT: li a3, 12
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v4
-; RV32-NEXT: vmv1r.v v0, v2
+; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 2
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
+; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v16, v4
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 4
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: li a3, 20
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vmv.v.v v12, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 4
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: li a3, 20
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 21
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmv4r.v v16, v8
+; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vslideup.vi v8, v16, 2
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 37
+; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -294,45 +283,36 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, %hi(.LCPI6_2)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 13
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: lui a1, %hi(.LCPI6_3)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_3)
-; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: lui a3, %hi(.LCPI6_3)
+; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3)
+; RV32-NEXT: vle16.v v24, (a1)
+; RV32-NEXT: vle16.v v8, (a3)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 2
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 45
+; RV32-NEXT: li a3, 40
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v0, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 13
+; RV32-NEXT: li a3, 12
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v4
-; RV32-NEXT: vmv1r.v v0, v2
+; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 29
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 2
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
@@ -340,8 +320,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v20, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 2
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: li a3, 12
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
@@ -350,178 +330,171 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 21
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v12, v24, v8
-; RV32-NEXT: vmv1r.v v0, v3
-; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 13
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vmv1r.v v0, v3
+; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t
+; RV32-NEXT: vmv.v.v v4, v12
; RV32-NEXT: lui a1, %hi(.LCPI6_5)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT: lui a3, %hi(.LCPI6_6)
+; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6)
; RV32-NEXT: vle16.v v24, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI6_6)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6)
-; RV32-NEXT: li a3, 960
-; RV32-NEXT: vle16.v v4, (a1)
-; RV32-NEXT: vmv.s.x v0, a3
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v8, (a3)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: li a1, 960
+; RV32-NEXT: vmv.s.x v2, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 45
+; RV32-NEXT: li a3, 40
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v16, v24
+; RV32-NEXT: vmv1r.v v0, v2
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 29
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t
-; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 13
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vmv.v.v v12, v8
+; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
+; RV32-NEXT: vmv.v.v v4, v8
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 13
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_7)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7)
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vle16.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 21
-; RV32-NEXT: mul a1, a1, a3
+; RV32-NEXT: slli a1, a1, 5
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v4, v16, v8
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v28, v24, v8
; RV32-NEXT: vmv1r.v v0, v3
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 37
+; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v4, v8, 4, v0.t
+; RV32-NEXT: vslideup.vi v28, v8, 4, v0.t
+; RV32-NEXT: vmv.v.v v4, v28
; RV32-NEXT: lui a1, %hi(.LCPI6_8)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT: vle16.v v0, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI6_9)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_9)
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a3, %hi(.LCPI6_9)
+; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9)
+; RV32-NEXT: vle16.v v28, (a1)
+; RV32-NEXT: vle16.v v24, (a3)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 45
+; RV32-NEXT: li a3, 40
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v0
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v16, v28
+; RV32-NEXT: vmv1r.v v0, v2
; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 48
+; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v4, v8
; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 21
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vslideup.vi v12, v8, 6
; RV32-NEXT: lui a1, %hi(.LCPI6_10)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10)
-; RV32-NEXT: vle16.v v8, (a1)
+; RV32-NEXT: vle16.v v4, (a1)
; RV32-NEXT: lui a1, 15
-; RV32-NEXT: vmv.s.x v24, a1
-; RV32-NEXT: vmv1r.v v0, v24
+; RV32-NEXT: vmv.s.x v6, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 37
+; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vslideup.vi v28, v24, 6
+; RV32-NEXT: vmv1r.v v0, v6
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t
-; RV32-NEXT: vmv.v.v v28, v12
+; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v28, v8, v4, v0.t
; RV32-NEXT: lui a1, %hi(.LCPI6_11)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT: lui a3, %hi(.LCPI6_12)
+; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12)
; RV32-NEXT: vle16.v v0, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI6_12)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12)
-; RV32-NEXT: li a3, 1008
-; RV32-NEXT: vle16.v v4, (a1)
-; RV32-NEXT: vmv.s.x v25, a3
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vle16.v v24, (a3)
+; RV32-NEXT: li a1, 1008
+; RV32-NEXT: vmv.s.x v7, a1
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 45
+; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a3, 40
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v16, v0
-; RV32-NEXT: vmv1r.v v0, v25
+; RV32-NEXT: vmv1r.v v0, v7
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 29
+; RV32-NEXT: li a3, 48
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t
+; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v28, v8
-; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 21
-; RV32-NEXT: mul a1, a1, a3
-; RV32-NEXT: add a1, sp, a1
-; RV32-NEXT: addi a1, a1, 16
-; RV32-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, %hi(.LCPI6_13)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13)
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: vmv1r.v v0, v24
+; RV32-NEXT: vmv1r.v v0, v6
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a3, a1, 3
-; RV32-NEXT: add a1, a3, a1
+; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a3, 37
+; RV32-NEXT: li a3, 24
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
@@ -530,70 +503,79 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: lui a1, %hi(.LCPI6_14)
; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT: lui a2, %hi(.LCPI6_15)
+; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15)
; RV32-NEXT: vle16.v v16, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI6_15)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI6_15)
-; RV32-NEXT: vle16.v v28, (a1)
+; RV32-NEXT: vle16.v v8, (a2)
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 24
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 45
+; RV32-NEXT: li a2, 40
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vrgatherei16.vv v8, v0, v16
-; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: li a2, 29
+; RV32-NEXT: li a2, 48
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: li a2, 24
+; RV32-NEXT: mul a1, a1, a2
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v24, v8
; RV32-NEXT: addi a1, a0, 320
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vse32.v v24, (a1)
; RV32-NEXT: addi a1, a0, 256
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 21
-; RV32-NEXT: mul a2, a2, a3
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
-; RV32-NEXT: vse32.v v8, (a1)
+; RV32-NEXT: vse32.v v28, (a1)
; RV32-NEXT: addi a1, a0, 192
; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 2
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: addi a1, a0, 128
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 13
-; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: addi a1, a0, 64
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a3, a2, 2
-; RV32-NEXT: add a2, a3, a2
+; RV32-NEXT: li a3, 12
+; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 16
; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a1)
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a2, a1, 4
-; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: li a2, 20
+; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a1, 54
+; RV32-NEXT: li a1, 56
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 16
@@ -604,324 +586,320 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 56
+; RV64-NEXT: li a3, 52
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: sub sp, sp, a2
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x34, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 52 * vlenb
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: addi a2, a1, 256
; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 5
+; RV64-NEXT: li a3, 27
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV64-NEXT: addi a2, a1, 128
; RV64-NEXT: vle64.v v8, (a2)
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 40
+; RV64-NEXT: li a3, 35
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vle64.v v24, (a1)
+; RV64-NEXT: vle64.v v8, (a1)
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 43
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vrgather.vi v8, v16, 4
; RV64-NEXT: li a1, 128
-; RV64-NEXT: vmv.s.x v4, a1
+; RV64-NEXT: vmv.s.x v0, a1
; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma
-; RV64-NEXT: vslidedown.vi v16, v16, 8
+; RV64-NEXT: vslidedown.vi v24, v16, 8
+; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
+; RV64-NEXT: vmv1r.v v28, v0
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 24
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64-NEXT: vmv1r.v v0, v4
+; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 20
+; RV64-NEXT: li a2, 19
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t
-; RV64-NEXT: vmv.v.v v20, v8
+; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vmv.v.v v4, v8
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: li a1, 6
; RV64-NEXT: vid.v v8
-; RV64-NEXT: vmul.vx v6, v8, a1
+; RV64-NEXT: vmul.vx v2, v8, a1
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT: vrgatherei16.vv v8, v24, v6
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 48
+; RV64-NEXT: li a2, 43
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v2
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: li a1, 56
-; RV64-NEXT: vmv.s.x v5, a1
-; RV64-NEXT: vadd.vi v16, v6, -16
+; RV64-NEXT: vmv.s.x v1, a1
+; RV64-NEXT: vadd.vi v30, v2, -16
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v5
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 40
+; RV64-NEXT: li a2, 35
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v30, v0.t
; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v20, v8
+; RV64-NEXT: vmv.v.v v4, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: sub a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 5
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v24, v16, 5
-; RV64-NEXT: vmv1r.v v0, v4
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 24
+; RV64-NEXT: li a2, 27
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t
+; RV64-NEXT: vrgather.vi v4, v16, 5
+; RV64-NEXT: vmv1r.v v0, v28
+; RV64-NEXT: vrgather.vi v4, v24, 3, v0.t
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v28, v6, 1
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs2r.v v2, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vadd.vi v16, v2, 1
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 48
+; RV64-NEXT: li a2, 43
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v28
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v24, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v28, v6, -15
+; RV64-NEXT: vadd.vi v16, v2, -15
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: li a2, 11
+; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: add a1, sp, a1
+; RV64-NEXT: addi a1, a1, 16
+; RV64-NEXT: vs2r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vmv1r.v v0, v5
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 40
+; RV64-NEXT: li a2, 35
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
-; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v24, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 12
+; RV64-NEXT: li a2, 11
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vmv2r.v v26, v6
-; RV64-NEXT: vadd.vi v24, v6, 2
-; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v2, v0.t
+; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
+; RV64-NEXT: vmv.v.v v4, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 48
+; RV64-NEXT: li a2, 11
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v0, v24
+; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vadd.vi v6, v2, 2
+; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64-NEXT: vrgatherei16.vv v8, v24, v6
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: li a1, 24
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 1
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vadd.vi v24, v26, -14
-; RV64-NEXT: vmv2r.v v6, v26
+; RV64-NEXT: vmv.s.x v7, a1
+; RV64-NEXT: vadd.vi v26, v2, -14
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
+; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.i v12, 6
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 5
+; RV64-NEXT: li a2, 27
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v20, v24, v12
+; RV64-NEXT: vrgatherei16.vv v16, v24, v12
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 20
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vl1r.v v6, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv1r.v v0, v6
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 24
+; RV64-NEXT: li a2, 19
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v20, v24, 4, v0.t
+; RV64-NEXT: vrgather.vi v16, v24, 4, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v20, v8
+; RV64-NEXT: vmv.v.v v16, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
+; RV64-NEXT: slli a2, a1, 3
+; RV64-NEXT: sub a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vmv2r.v v10, v6
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 6
-; RV64-NEXT: mul a1, a1, a2
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs2r.v v6, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vadd.vi v8, v6, 3
+; RV64-NEXT: vadd.vi v28, v2, 3
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 48
+; RV64-NEXT: li a2, 43
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v0, v8
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v28
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v28, v10, -13
+; RV64-NEXT: vadd.vi v28, v2, -13
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vmv1r.v v0, v7
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 1
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 40
+; RV64-NEXT: li a2, 35
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v8, v28, v0.t
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t
; RV64-NEXT: lui a1, 16
; RV64-NEXT: addi a1, a1, 7
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 5
+; RV64-NEXT: li a2, 27
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vmv4r.v v8, v0
-; RV64-NEXT: vrgatherei16.vv v20, v0, v12
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v16, v24, v12
+; RV64-NEXT: vmv1r.v v0, v6
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 20
+; RV64-NEXT: li a2, 19
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgather.vi v16, v24, 5, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v20, v16
+; RV64-NEXT: vmv.v.v v16, v8
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 20
-; RV64-NEXT: mul a1, a1, a2
+; RV64-NEXT: slli a2, a1, 1
+; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: lui a1, 96
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.v.x v12, a1
+; RV64-NEXT: vmv.v.x v8, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: li a1, 192
; RV64-NEXT: vmv.s.x v0, a1
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vrgather.vi v28, v8, 2
-; RV64-NEXT: vrgatherei16.vv v28, v24, v12, v0.t
-; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 6
+; RV64-NEXT: li a2, 27
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vadd.vi v16, v24, 4
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgather.vi v4, v16, 2
+; RV64-NEXT: vrgatherei16.vv v4, v24, v8, v0.t
+; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT: vadd.vi v16, v2, 4
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 48
+; RV64-NEXT: li a2, 43
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v0, v16
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v24, v16
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: li a1, 28
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vadd.vi v26, v24, -12
+; RV64-NEXT: vmv.s.x v1, a1
+; RV64-NEXT: vadd.vi v16, v2, -12
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 40
+; RV64-NEXT: li a2, 35
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
-; RV64-NEXT: vmv.v.v v28, v8
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 1
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vmv.v.v v4, v8
; RV64-NEXT: lui a1, 112
; RV64-NEXT: addi a1, a1, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmv.v.x v12, a1
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 5
+; RV64-NEXT: li a2, 27
+; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgather.vi v8, v16, 3
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 24
+; RV64-NEXT: li a2, 19
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v12, v24, 5
+; RV64-NEXT: vadd.vi v12, v2, 5
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 48
+; RV64-NEXT: li a2, 43
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
-; RV64-NEXT: vrgatherei16.vv v16, v0, v12
+; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vrgatherei16.vv v16, v24, v12
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vadd.vi v12, v24, -11
+; RV64-NEXT: vadd.vi v12, v2, -11
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: li a2, 40
+; RV64-NEXT: li a2, 35
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
@@ -933,43 +911,40 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 1
-; RV64-NEXT: add a2, sp, a2
-; RV64-NEXT: addi a2, a2, 16
-; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
-; RV64-NEXT: vse64.v v8, (a1)
+; RV64-NEXT: vse64.v v4, (a1)
; RV64-NEXT: addi a1, a0, 192
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 20
-; RV64-NEXT: mul a2, a2, a3
+; RV64-NEXT: slli a3, a2, 1
+; RV64-NEXT: add a2, a3, a2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 128
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 3
+; RV64-NEXT: slli a3, a2, 3
+; RV64-NEXT: sub a2, a3, a2
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: addi a1, a0, 64
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: li a3, 12
+; RV64-NEXT: li a3, 11
; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 4
+; RV64-NEXT: slli a2, a1, 4
+; RV64-NEXT: sub a1, a2, a1
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: li a1, 56
+; RV64-NEXT: li a1, 52
; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index 023d707f07bff7..c295fed2c28c10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -286,8 +286,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, zero
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -303,8 +303,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
@@ -331,8 +331,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a3
; CHECK-NEXT: vslide1down.vx v8, v8, zero
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -348,8 +348,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
@@ -375,8 +375,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
@@ -391,8 +391,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; ZVE32F-NEXT: vand.vi v8, v8, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 7fc442c88d101b..4f7b885d998e5b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -25,10 +25,10 @@ define void @splat_v1i1(ptr %x, i1 %y) {
; CHECK-LABEL: splat_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: andi a1, a1, 1
-; CHECK-NEXT: vmv.s.x v9, a1
-; CHECK-NEXT: vmsne.vi v0, v9, 0
+; CHECK-NEXT: vmv.s.x v8, a1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 539a8403c93521..9fbc22221f99bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -12728,8 +12728,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: vslide1down.vx v8, v8, a7
; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v8, v8, a7
; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-NEXT: ret
;
@@ -12803,8 +12803,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64V-NEXT: vmv.v.x v8, a3
; RV64V-NEXT: vslide1down.vx v8, v8, a5
; RV64V-NEXT: vslide1down.vx v8, v8, a6
-; RV64V-NEXT: vslide1down.vx v8, v8, a7
; RV64V-NEXT: vmv.v.i v0, 15
+; RV64V-NEXT: vslide1down.vx v8, v8, a7
; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64V-NEXT: addi sp, s0, -128
; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
@@ -12854,8 +12854,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: vmv.v.x v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -12896,8 +12896,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
@@ -12941,8 +12941,8 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
@@ -12986,8 +12986,8 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
@@ -13031,8 +13031,8 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
@@ -13074,8 +13074,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
@@ -13120,8 +13120,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
@@ -13167,8 +13167,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
@@ -13217,8 +13217,8 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
@@ -13264,8 +13264,8 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
@@ -13320,8 +13320,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
@@ -13367,8 +13367,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index 5f456c7824316b..2a0ec47a3de01c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -221,10 +221,10 @@ define i32 @reduce_sum_16xi32_prefix7(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix7:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vle32.v v10, (a0)
-; CHECK-NEXT: vslideup.vi v10, v8, 7
-; CHECK-NEXT: vredsum.vs v8, v10, v8
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v10, zero
+; CHECK-NEXT: vslideup.vi v8, v10, 7
+; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -248,9 +248,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) {
; CHECK-LABEL: reduce_sum_16xi32_prefix8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vle32.v v10, (a0)
-; CHECK-NEXT: vredsum.vs v8, v10, v8
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v10, zero
+; CHECK-NEXT: vredsum.vs v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -670,15 +670,15 @@ define i32 @reduce_smax_16xi32_prefix5(ptr %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 5
+; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 6
+; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 7
-; CHECK-NEXT: vredmax.vs v8, v10, v10
+; CHECK-NEXT: vslideup.vi v8, v10, 7
+; CHECK-NEXT: vredmax.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -715,15 +715,15 @@ define i32 @reduce_smin_16xi32_prefix5(ptr %p) {
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 5
+; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 6
+; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 7
-; CHECK-NEXT: vredmin.vs v8, v10, v10
+; CHECK-NEXT: vslideup.vi v8, v10, 7
+; CHECK-NEXT: vredmin.vs v8, v8, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
%v = load <16 x i32>, ptr %p, align 256
@@ -830,9 +830,9 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) {
; CHECK-LABEL: reduce_fadd_16xf32_prefix2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, zero
-; CHECK-NEXT: vle32.v v9, (a0)
-; CHECK-NEXT: vfredusum.vs v8, v9, v8
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v9, zero
+; CHECK-NEXT: vfredusum.vs v8, v8, v9
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x float>, ptr %p, align 256
@@ -847,15 +847,15 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, 524288
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v8, a1
-; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 5
+; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 6
+; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslideup.vi v10, v8, 7
-; CHECK-NEXT: vfredusum.vs v8, v10, v8
+; CHECK-NEXT: vslideup.vi v8, v10, 7
+; CHECK-NEXT: vfredusum.vs v8, v8, v10
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
%v = load <16 x float>, ptr %p, align 256
diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
index ab7da9e0faf2b9..032d32109933f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
@@ -49,8 +49,8 @@ define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) {
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrsub.vi v12, v11, 7
; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vrsub.vi v8, v11, 3
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vrsub.vi v8, v11, 3
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
@@ -174,8 +174,8 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) {
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrsub.vi v12, v11, 7
; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vrsub.vi v8, v11, 3
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vrsub.vi v8, v11, 3
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
@@ -492,8 +492,8 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) {
; CHECK-NEXT: vid.v v11
; CHECK-NEXT: vrsub.vi v12, v11, 7
; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vrsub.vi v8, v11, 3
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vrsub.vi v8, v11, 3
; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 922692ed88c9f2..9a5e86d61c265e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -107,14 +107,14 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs
; CHECK-NEXT: srli a2, a1, 1
; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vsrl.vi v26, v24, 1
-; CHECK-NEXT: vand.vi v24, v24, 1
-; CHECK-NEXT: vmsne.vi v28, v24, 0
+; CHECK-NEXT: vand.vi v26, v24, 1
+; CHECK-NEXT: vmsne.vi v28, v26, 0
+; CHECK-NEXT: vsrl.vi v24, v24, 1
; CHECK-NEXT: vmv1r.v v0, v28
-; CHECK-NEXT: vadd.vx v26, v26, a2, v0.t
+; CHECK-NEXT: vadd.vx v24, v24, a2, v0.t
; CHECK-NEXT: vmv4r.v v12, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v0, v8, v26
+; CHECK-NEXT: vrgatherei16.vv v0, v8, v24
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
@@ -123,7 +123,7 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v8, v16, v26
+; CHECK-NEXT: vrgatherei16.vv v8, v16, v24
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vs8r.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 327e18e913819e..4b6ad0f27214d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -122,9 +122,9 @@ define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64>
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vand.vi v13, v12, 1
+; CHECK-NEXT: vmsne.vi v0, v13, 0
; CHECK-NEXT: vsrl.vi v16, v12, 1
-; CHECK-NEXT: vand.vi v12, v12, 1
-; CHECK-NEXT: vmsne.vi v0, v12, 0
; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -137,9 +137,9 @@ define <vscale x 4 x i64> @vector_interleave_nxv4i64_nxv2i64(<vscale x 2 x i64>
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; ZVBB-NEXT: vid.v v12
+; ZVBB-NEXT: vand.vi v13, v12, 1
+; ZVBB-NEXT: vmsne.vi v0, v13, 0
; ZVBB-NEXT: vsrl.vi v16, v12, 1
-; ZVBB-NEXT: vand.vi v12, v12, 1
-; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t
; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16
@@ -288,44 +288,32 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv8r.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vsrl.vi v6, v24, 1
-; CHECK-NEXT: vand.vi v8, v24, 1
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vx v6, v6, a0, v0.t
+; CHECK-NEXT: vand.vi v26, v24, 1
+; CHECK-NEXT: vmsne.vi v10, v26, 0
+; CHECK-NEXT: vsrl.vi v8, v24, 1
+; CHECK-NEXT: vmv8r.v v24, v0
+; CHECK-NEXT: vmv4r.v v12, v4
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v8, v24, v6
+; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v6
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -335,44 +323,32 @@ define <vscale x 16 x i64> @vector_interleave_nxv16i64_nxv8i64(<vscale x 8 x i64
; ZVBB-NEXT: addi sp, sp, -16
; ZVBB-NEXT: .cfi_def_cfa_offset 16
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 4
-; ZVBB-NEXT: sub sp, sp, a0
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 3
-; ZVBB-NEXT: add a0, sp, a0
-; ZVBB-NEXT: addi a0, a0, 16
-; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVBB-NEXT: sub sp, sp, a0
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVBB-NEXT: vmv8r.v v0, v8
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: srli a0, a0, 1
; ZVBB-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; ZVBB-NEXT: vid.v v24
-; ZVBB-NEXT: vsrl.vi v6, v24, 1
-; ZVBB-NEXT: vand.vi v8, v24, 1
-; ZVBB-NEXT: vmsne.vi v0, v8, 0
-; ZVBB-NEXT: csrr a1, vlenb
-; ZVBB-NEXT: slli a1, a1, 3
-; ZVBB-NEXT: add a1, sp, a1
-; ZVBB-NEXT: addi a1, a1, 16
-; ZVBB-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVBB-NEXT: vadd.vx v6, v6, a0, v0.t
+; ZVBB-NEXT: vand.vi v26, v24, 1
+; ZVBB-NEXT: vmsne.vi v10, v26, 0
+; ZVBB-NEXT: vsrl.vi v8, v24, 1
+; ZVBB-NEXT: vmv8r.v v24, v0
+; ZVBB-NEXT: vmv4r.v v12, v4
+; ZVBB-NEXT: vmv1r.v v0, v10
+; ZVBB-NEXT: vadd.vx v8, v8, a0, v0.t
; ZVBB-NEXT: vmv4r.v v28, v16
; ZVBB-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; ZVBB-NEXT: vrgatherei16.vv v8, v24, v6
+; ZVBB-NEXT: vrgatherei16.vv v0, v24, v8
; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 3
-; ZVBB-NEXT: add a0, sp, a0
-; ZVBB-NEXT: addi a0, a0, 16
-; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVBB-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; ZVBB-NEXT: vmv4r.v v16, v12
-; ZVBB-NEXT: vrgatherei16.vv v24, v16, v6
-; ZVBB-NEXT: addi a0, sp, 16
+; ZVBB-NEXT: vrgatherei16.vv v24, v16, v8
; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVBB-NEXT: vmv.v.v v16, v24
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 4
+; ZVBB-NEXT: slli a0, a0, 3
; ZVBB-NEXT: add sp, sp, a0
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: ret
@@ -516,9 +492,9 @@ define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x do
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vand.vi v13, v12, 1
+; CHECK-NEXT: vmsne.vi v0, v13, 0
; CHECK-NEXT: vsrl.vi v16, v12, 1
-; CHECK-NEXT: vand.vi v12, v12, 1
-; CHECK-NEXT: vmsne.vi v0, v12, 0
; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
@@ -531,9 +507,9 @@ define <vscale x 4 x double> @vector_interleave_nxv4f64_nxv2f64(<vscale x 2 x do
; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; ZVBB-NEXT: vid.v v12
+; ZVBB-NEXT: vand.vi v13, v12, 1
+; ZVBB-NEXT: vmsne.vi v0, v13, 0
; ZVBB-NEXT: vsrl.vi v16, v12, 1
-; ZVBB-NEXT: vand.vi v12, v12, 1
-; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t
; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16
@@ -612,44 +588,32 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv8r.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; CHECK-NEXT: vid.v v24
-; CHECK-NEXT: vsrl.vi v6, v24, 1
-; CHECK-NEXT: vand.vi v8, v24, 1
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vx v6, v6, a0, v0.t
+; CHECK-NEXT: vand.vi v26, v24, 1
+; CHECK-NEXT: vmsne.vi v10, v26, 0
+; CHECK-NEXT: vsrl.vi v8, v24, 1
+; CHECK-NEXT: vmv8r.v v24, v0
+; CHECK-NEXT: vmv4r.v v12, v4
+; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vrgatherei16.vv v8, v24, v6
+; CHECK-NEXT: vrgatherei16.vv v0, v24, v8
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv4r.v v16, v12
-; CHECK-NEXT: vrgatherei16.vv v24, v16, v6
-; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vrgatherei16.vv v24, v16, v8
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
@@ -659,44 +623,32 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv8f64(<vscale x 8 x
; ZVBB-NEXT: addi sp, sp, -16
; ZVBB-NEXT: .cfi_def_cfa_offset 16
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 4
-; ZVBB-NEXT: sub sp, sp, a0
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: slli a0, a0, 3
-; ZVBB-NEXT: add a0, sp, a0
-; ZVBB-NEXT: addi a0, a0, 16
-; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVBB-NEXT: sub sp, sp, a0
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVBB-NEXT: vmv8r.v v0, v8
; ZVBB-NEXT: csrr a0, vlenb
; ZVBB-NEXT: srli a0, a0, 1
; ZVBB-NEXT: vsetvli a1, zero, e16, m2, ta, mu
; ZVBB-NEXT: vid.v v24
-; ZVBB-NEXT: vsrl.vi v6, v24, 1
-; ZVBB-NEXT: vand.vi v8, v24, 1
-; ZVBB-NEXT: vmsne.vi v0, v8, 0
-; ZVBB-NEXT: csrr a1, vlenb
-; ZVBB-NEXT: slli a1, a1, 3
-; ZVBB-NEXT: add a1, sp, a1
-; ZVBB-NEXT: addi a1, a1, 16
-; ZVBB-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
-; ZVBB-NEXT: vadd.vx v6, v6, a0, v0.t
+; ZVBB-NEXT: vand.vi v26, v24, 1
+; ZVBB-NEXT: vmsne.vi v10, v26, 0
+; ZVBB-NEXT: vsrl.vi v8, v24, 1
+; ZVBB-NEXT: vmv8r.v v24, v0
+; ZVBB-NEXT: vmv4r.v v12, v4
+; ZVBB-NEXT: vmv1r.v v0, v10
+; ZVBB-NEXT: vadd.vx v8, v8, a0, v0.t
; ZVBB-NEXT: vmv4r.v v28, v16
; ZVBB-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; ZVBB-NEXT: vrgatherei16.vv v8, v24, v6
+; ZVBB-NEXT: vrgatherei16.vv v0, v24, v8
; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 3
-; ZVBB-NEXT: add a0, sp, a0
-; ZVBB-NEXT: addi a0, a0, 16
-; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVBB-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; ZVBB-NEXT: vmv4r.v v16, v12
-; ZVBB-NEXT: vrgatherei16.vv v24, v16, v6
-; ZVBB-NEXT: addi a0, sp, 16
+; ZVBB-NEXT: vrgatherei16.vv v24, v16, v8
; ZVBB-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVBB-NEXT: vmv.v.v v16, v24
; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 4
+; ZVBB-NEXT: slli a0, a0, 3
; ZVBB-NEXT: add sp, sp, a0
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
index e7184921d87a08..ffeb399291e1f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -187,11 +190,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v10
-; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfeq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16(
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v12
-; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfeq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16(
@@ -289,9 +294,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -340,9 +346,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -391,11 +398,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v10
-; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfeq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32(
@@ -442,11 +450,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v12
-; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfeq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32(
@@ -493,9 +502,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v9
+; CHECK-NEXT: vmfeq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -544,11 +554,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v10
-; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfeq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfeq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64(
@@ -595,11 +606,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfeq.vv v0, v8, v12
-; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfeq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
index a6dad9eaa4f358..993b50a1c81ce8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -187,11 +190,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v10, v8
-; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmfle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16(
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v12, v8
-; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmfle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16(
@@ -289,9 +294,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -340,9 +346,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -391,11 +398,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v10, v8
-; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmfle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32(
@@ -442,11 +450,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v12, v8
-; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmfle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32(
@@ -493,9 +502,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v9, v8
+; CHECK-NEXT: vmfle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -544,11 +554,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v10, v8
-; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmfle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64(
@@ -595,11 +606,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v12, v8
-; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmfle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
index f643a4036381c3..427f0eb28e7df5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -187,11 +190,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v10, v8
-; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmflt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16(
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v12, v8
-; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16(
@@ -289,9 +294,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -340,9 +346,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -391,11 +398,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v10, v8
-; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmflt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32(
@@ -442,11 +450,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v12, v8
-; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32(
@@ -493,9 +502,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v9, v8
+; CHECK-NEXT: vmflt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -544,11 +554,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v10, v8
-; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmflt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmflt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64(
@@ -595,11 +606,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v12, v8
-; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmflt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
index 6c52364c1fbd56..e5327632fc04f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -187,11 +190,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v10
-; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16(
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v12
-; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16(
@@ -289,9 +294,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -340,9 +346,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -391,11 +398,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v10
-; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32(
@@ -442,11 +450,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v12
-; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32(
@@ -493,9 +502,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v9
+; CHECK-NEXT: vmfle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -544,11 +554,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v10
-; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64(
@@ -595,11 +606,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfle.vv v0, v8, v12
-; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
index 37a9c6b081a1df..64f257e355ceae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -187,11 +190,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v10
-; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmflt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16(
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v12
-; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmflt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16(
@@ -289,9 +294,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -340,9 +346,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -391,11 +398,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v10
-; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmflt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32(
@@ -442,11 +450,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v12
-; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmflt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32(
@@ -493,9 +502,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v9
+; CHECK-NEXT: vmflt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -544,11 +554,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v10
-; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmflt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmflt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64(
@@ -595,11 +606,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmflt.vv v0, v8, v12
-; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmflt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
index 5defce42091e55..6f6a2a5e8783c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f16(
define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f16(
define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f16(
define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -187,11 +190,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16(
define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v10
-; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16(
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16(
define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v12
-; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16(
@@ -289,9 +294,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f32(
define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -340,9 +346,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f32(
define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -391,11 +398,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32(
define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v10
-; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32(
@@ -442,11 +450,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32(
define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v12
-; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32(
@@ -493,9 +502,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f64(
define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v9
+; CHECK-NEXT: vmfne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -544,11 +554,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64(
define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v10
-; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmfne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmfne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64(
@@ -595,11 +606,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64(
define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmfne.vv v0, v8, v12
-; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmfne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
index cc6c1f585bb7d8..da1c751b566304 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v10
-; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmseq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v12
-; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmseq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v10
-; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmseq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v12
-; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmseq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v10
-; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmseq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v12
-; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmseq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v9
+; CHECK-NEXT: vmseq.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v10
-; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmseq.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmseq.vv v0, v8, v12
-; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmseq.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
index c8f9b60a3f2da6..502fb9b24148f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v10, v8
-; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v12, v8
-; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v10, v8
-; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v12, v8
-; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v10, v8
-; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v12, v8
-; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v9, v8
+; CHECK-NEXT: vmsle.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v10, v8
-; CHECK-NEXT: vmsle.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsle.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v12, v8
-; CHECK-NEXT: vmsle.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsle.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
index b6c6d9e90f6109..9410a99d81423f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v10, v8
-; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v12, v8
-; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v10, v8
-; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v12, v8
-; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v10, v8
-; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v12, v8
-; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v9, v8
+; CHECK-NEXT: vmsleu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v10, v8
-; CHECK-NEXT: vmsleu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v12, v8
-; CHECK-NEXT: vmsleu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
index dfd7096a65ebb9..b7a676e7f2dd37 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v10, v8
-; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmslt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v12, v8
-; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmslt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsgt.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v10, v8
-; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmslt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v12, v8
-; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmslt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v10, v8
-; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmslt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v12, v8
-; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmslt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgt.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v9, v8
+; CHECK-NEXT: vmslt.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v10, v8
-; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmslt.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v12, v8
-; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmslt.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
index 8826be03bbebb8..88a632de067a68 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v10, v8
-; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v12, v8
-; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v10, v8
-; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v12, v8
-; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v10, v8
-; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v12, v8
-; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v9, v8
+; CHECK-NEXT: vmsltu.vv v8, v9, v8
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v10, v8
-; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v10, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v12, v10, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v12, v8
-; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v12, v8
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
index 5d5a28edbfe151..2248ba03adfe79 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v10
-; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v12
-; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsle.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v10
-; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v12
-; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v10
-; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v12
-; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsle.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v9
+; CHECK-NEXT: vmsle.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v10
-; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsle.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsle.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsle.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsle.vv v0, v8, v12
-; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsle.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
index c58ac2d0718314..57bae83b25e0e5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v10
-; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v12
-; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsleu.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v10
-; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v12
-; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v10
-; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v12
-; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsleu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v9
+; CHECK-NEXT: vmsleu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v10
-; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsleu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsleu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsleu.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsleu.vv v0, v8, v12
-; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsleu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
index 6c6e580b043d1a..6783f7feb624c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v10
-; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmslt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v12
-; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmslt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmslt.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v10
-; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmslt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v12
-; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmslt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v10
-; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmslt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v12
-; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmslt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmslt.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v9
+; CHECK-NEXT: vmslt.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v10
-; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmslt.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmslt.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmslt.vv v0, v8, v12
-; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmslt.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
index 76f3e449ab58f5..b082b735a02072 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v10
-; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v12
-; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsltu.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v10
-; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v12
-; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v10
-; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v12
-; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsltu.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v9
+; CHECK-NEXT: vmsltu.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v10
-; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsltu.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsltu.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsltu.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsltu.vv v0, v8, v12
-; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsltu.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
index 161c1bc4314fcb..bb4575e5d72cbe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
@@ -34,9 +34,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i8(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -85,9 +86,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i8(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -136,9 +138,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i8(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -187,9 +190,10 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i8(
define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -238,11 +242,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8(
define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v10
-; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i8(
@@ -289,11 +294,12 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8(
define <vscale x 32 x i1> @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v12
-; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 32 x i1> @llvm.riscv.vmsne.nxv32i8(
@@ -340,9 +346,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i16(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -391,9 +398,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i16(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -442,9 +450,10 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i16(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -493,11 +502,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16(
define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v10
-; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i16(
@@ -544,11 +554,12 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16(
define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v12
-; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i16(
@@ -595,9 +606,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i32(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: ret
@@ -646,9 +658,10 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i32(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -697,11 +710,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v10
-; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32(
@@ -748,11 +762,12 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32(
define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v12
-; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i32(
@@ -799,9 +814,10 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsne.mask.nxv1i64(
define <vscale x 1 x i1> @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v11, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v9
+; CHECK-NEXT: vmsne.vv v8, v8, v9
+; CHECK-NEXT: vmv1r.v v11, v0
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t
; CHECK-NEXT: vmv.v.v v0, v11
; CHECK-NEXT: ret
@@ -850,11 +866,12 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64(
define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v14, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v10
-; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t
+; CHECK-NEXT: vmsne.vv v14, v8, v10
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v14
+; CHECK-NEXT: vmsne.vv v8, v10, v12, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 2 x i1> @llvm.riscv.vmsne.nxv2i64(
@@ -901,11 +918,12 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64(
define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind {
; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmv1r.v v20, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT: vmsne.vv v0, v8, v12
-; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t
+; CHECK-NEXT: vmsne.vv v20, v8, v12
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: vmv1r.v v0, v20
+; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: ret
entry:
%mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index 39f517a100f527..e8620c848f8d3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \
-# RUN: -run-pass=riscv-insert-vsetvli,riscv-coalesce-vsetvli | FileCheck %s
+# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s
--- |
source_filename = "vsetvli-insert.ll"
@@ -166,7 +166,7 @@ body: |
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8
%2:gprnox0 = COPY $x11
@@ -208,7 +208,7 @@ body: |
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 %pt, [[COPY1]], $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: %dead:vr = IMPLICIT_DEF
- ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY %3
; CHECK-NEXT: PseudoRET implicit $v8
%1:gprnox0 = COPY $x11
@@ -282,8 +282,8 @@ body: |
; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x)
; CHECK-NEXT: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt2, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.y)
; CHECK-NEXT: %pt3:vr = IMPLICIT_DEF
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, [[PseudoVLE64_V_M1_]], [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: PseudoVSE64_V_M1 [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x)
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x)
; CHECK-NEXT: PseudoRET
%1:gpr = COPY $x11
%0:gpr = COPY $x10
@@ -328,8 +328,8 @@ body: |
; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], [[PseudoVLE64_V_M1_]], [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
- ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S [[PseudoVREDSUM_VS_M1_E8_]], 6 /* e64 */, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVREDSUM_VS_M1_E8_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1_E8 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVMV_X_S:%[0-9]+]]:gpr = PseudoVMV_X_S killed [[PseudoVREDSUM_VS_M1_E8_]], 6 /* e64 */, implicit $vtype
; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gpr = COPY $x10
@@ -418,7 +418,7 @@ body: |
; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF
; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
- ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]]
; CHECK-NEXT: PseudoRET implicit $v8
%2:gprnox0 = COPY $x11
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index fcd852f1210df5..d0b76e7e4535b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -27,7 +27,7 @@ body: |
; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3
; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22
- ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: $x12 = PseudoReadVLENB
More information about the llvm-commits
mailing list