[llvm] [RISCV] Move performCombineVMergeAndVOps to RISCVVectorPeephole (PR #144076)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 13 09:16:42 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/144076
>From 010a8f4d1df77f4addbf0c3051b5eb664739f81f Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 13 Jun 2025 14:28:37 +0100
Subject: [PATCH 1/2] [RISCV] Move performCombineVMergeAndVOps to
RISCVVectorPeephole
This moves the peephole that folds vmerges into its operands into RISCVVectorPeephole. This will also allow us to eventually commute instructions to allow folding, see #141885 and #70042
Most of the test diffs are due to the slight change in instruction ordering.
For now doPeepholeMaskedRVV is kept even though its a duplicate of RISCVVectorPeephole::convertToUnmasked to minimize the diff, I plan on removing it in a separate patch as it causes some instructions to be shuffled around.
Similarly, this runs foldVMergeToMask before the other peepholes to minimize the diff for now.
rvv-peephole-vmerge-vops-mir.ll was replaced with a dedicate vmerge-peephole.mir test.
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 214 ----------
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 -
llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 155 ++++++-
.../RISCV/rvv/combine-reduce-add-to-vcpop.ll | 67 +--
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 36 +-
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 396 +++++++++---------
.../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 80 ++--
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 266 ++++++------
.../fixed-vectors-shuffle-deinterleave2.ll | 192 ++++-----
.../fixed-vectors-shuffle-int-interleave.ll | 55 ++-
.../RISCV/rvv/fixed-vectors-shuffle-int.ll | 23 +-
.../RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll | 45 --
.../RISCV/rvv/vector-deinterleave-fixed.ll | 22 +-
.../CodeGen/RISCV/rvv/vmerge-peephole.mir | 57 +++
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 14 +-
15 files changed, 785 insertions(+), 838 deletions(-)
delete mode 100644 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4539efd591c8b..604b924e94b66 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -163,8 +163,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
CurDAG->setRoot(Dummy.getValue());
- MadeChange |= doPeepholeMergeVVMFold();
-
// After we're done with everything else, convert IMPLICIT_DEF
// passthru operands to NoRegister. This is required to workaround
// an optimization deficiency in MachineCSE. This really should
@@ -4069,218 +4067,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
return true;
}
-static bool IsVMerge(SDNode *N) {
- return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
-}
-
-// Try to fold away VMERGE_VVM instructions into their true operands:
-//
-// %true = PseudoVADD_VV ...
-// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
-// ->
-// %x = PseudoVADD_VV_MASK %false, ..., %mask
-//
-// We can only fold if vmerge's passthru operand, vmerge's false operand and
-// %true's passthru operand (if it has one) are the same. This is because we
-// have to consolidate them into one passthru operand in the result.
-//
-// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
-// mask is all ones.
-//
-// The resulting VL is the minimum of the two VLs.
-//
-// The resulting policy is the effective policy the vmerge would have had,
-// i.e. whether or not it's passthru operand was implicit-def.
-bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
- SDValue Passthru, False, True, VL, Mask;
- assert(IsVMerge(N));
- Passthru = N->getOperand(0);
- False = N->getOperand(1);
- True = N->getOperand(2);
- Mask = N->getOperand(3);
- VL = N->getOperand(4);
-
- // If the EEW of True is different from vmerge's SEW, then we can't fold.
- if (True.getSimpleValueType() != N->getSimpleValueType(0))
- return false;
-
- // We require that either passthru and false are the same, or that passthru
- // is undefined.
- if (Passthru != False && !isImplicitDef(Passthru))
- return false;
-
- assert(True.getResNo() == 0 &&
- "Expect True is the first output of an instruction.");
-
- // Need N is the exactly one using True.
- if (!True.hasOneUse())
- return false;
-
- if (!True.isMachineOpcode())
- return false;
-
- unsigned TrueOpc = True.getMachineOpcode();
- const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
- uint64_t TrueTSFlags = TrueMCID.TSFlags;
- bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
-
- const RISCV::RISCVMaskedPseudoInfo *Info =
- RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
- if (!Info)
- return false;
-
- // If True has a passthru operand then it needs to be the same as vmerge's
- // False, since False will be used for the result's passthru operand.
- if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
- SDValue PassthruOpTrue = True->getOperand(0);
- if (False != PassthruOpTrue)
- return false;
- }
-
- // Skip if True has side effect.
- if (TII->get(TrueOpc).hasUnmodeledSideEffects())
- return false;
-
- unsigned TrueChainOpIdx = True.getNumOperands() - 1;
- bool HasChainOp =
- True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
-
- if (HasChainOp) {
- // Avoid creating cycles in the DAG. We must ensure that none of the other
- // operands depend on True through it's Chain.
- SmallVector<const SDNode *, 4> LoopWorklist;
- SmallPtrSet<const SDNode *, 16> Visited;
- LoopWorklist.push_back(False.getNode());
- LoopWorklist.push_back(Mask.getNode());
- LoopWorklist.push_back(VL.getNode());
- if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
- return false;
- }
-
- // The vector policy operand may be present for masked intrinsics
- bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
- unsigned TrueVLIndex =
- True.getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
- SDValue TrueVL = True.getOperand(TrueVLIndex);
- SDValue SEW = True.getOperand(TrueVLIndex + 1);
-
- auto GetMinVL = [](SDValue LHS, SDValue RHS) {
- if (LHS == RHS)
- return LHS;
- if (isAllOnesConstant(LHS))
- return RHS;
- if (isAllOnesConstant(RHS))
- return LHS;
- auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
- auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
- if (!CLHS || !CRHS)
- return SDValue();
- return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
- };
-
- // Because N and True must have the same passthru operand (or True's operand
- // is implicit_def), the "effective" body is the minimum of their VLs.
- SDValue OrigVL = VL;
- VL = GetMinVL(TrueVL, VL);
- if (!VL)
- return false;
-
- // Some operations produce different elementwise results depending on the
- // active elements, like viota.m or vredsum. This transformation is illegal
- // for these if we change the active elements (i.e. mask or VL).
- const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
- if (RISCVII::elementsDependOnVL(TrueBaseMCID.TSFlags) && (TrueVL != VL))
- return false;
- if (RISCVII::elementsDependOnMask(TrueBaseMCID.TSFlags) &&
- (Mask && !usesAllOnesMask(Mask)))
- return false;
-
- // Make sure it doesn't raise any observable fp exceptions, since changing the
- // active elements will affect how fflags is set.
- if (mayRaiseFPException(True.getNode()) && !True->getFlags().hasNoFPExcept())
- return false;
-
- SDLoc DL(N);
-
- unsigned MaskedOpc = Info->MaskedPseudo;
-#ifndef NDEBUG
- const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
- assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
- "Expected instructions with mask have policy operand.");
- assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
- MCOI::TIED_TO) == 0 &&
- "Expected instructions with mask have a tied dest.");
-#endif
-
- // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
- // operand is undefined.
- //
- // However, if the VL became smaller than what the vmerge had originally, then
- // elements past VL that were previously in the vmerge's body will have moved
- // to the tail. In that case we always need to use tail undisturbed to
- // preserve them.
- bool MergeVLShrunk = VL != OrigVL;
- uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
- ? RISCVVType::TAIL_AGNOSTIC
- : /*TUMU*/ 0;
- SDValue PolicyOp =
- CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
-
-
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(False);
-
- const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
- const unsigned NormalOpsEnd = TrueVLIndex - HasRoundingMode;
- Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
-
- Ops.push_back(Mask);
-
- // For unmasked "VOp" with rounding mode operand, that is interfaces like
- // (..., rm, vl) or (..., rm, vl, policy).
- // Its masked version is (..., vm, rm, vl, policy).
- // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
- if (HasRoundingMode)
- Ops.push_back(True->getOperand(TrueVLIndex - 1));
-
- Ops.append({VL, SEW, PolicyOp});
-
- // Result node should have chain operand of True.
- if (HasChainOp)
- Ops.push_back(True.getOperand(TrueChainOpIdx));
-
- MachineSDNode *Result =
- CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
- Result->setFlags(True->getFlags());
-
- if (!cast<MachineSDNode>(True)->memoperands_empty())
- CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
-
- // Replace vmerge.vvm node by Result.
- ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
-
- // Replace another value of True. E.g. chain and VL.
- for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
- ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
-
- return true;
-}
-
-bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
- bool MadeChange = false;
- SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- if (N->use_empty() || !N->isMachineOpcode())
- continue;
-
- if (IsVMerge(N))
- MadeChange |= performCombineVMergeAndVOps(N);
- }
- return MadeChange;
-}
-
/// If our passthru is an implicit_def, use noreg instead. This side
/// steps issues with MachineCSE not being able to CSE expressions with
/// IMPLICIT_DEF operands while preserving the semantic intent. See
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index cb63c21fd8fc9..59c1b48282f5b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -199,7 +199,6 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
private:
bool doPeepholeSExtW(SDNode *Node);
bool doPeepholeMaskedRVV(MachineSDNode *Node);
- bool doPeepholeMergeVVMFold();
bool doPeepholeNoRegPassThru();
bool performCombineVMergeAndVOps(SDNode *N);
bool selectImm64IfCheaper(int64_t Imm, int64_t OrigImm, SDValue N,
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index c9c2413d009b7..69c288c7a2e7a 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -67,12 +67,13 @@ class RISCVVectorPeephole : public MachineFunctionPass {
bool convertSameMaskVMergeToVMv(MachineInstr &MI);
bool foldUndefPassthruVMV_V_V(MachineInstr &MI);
bool foldVMV_V_V(MachineInstr &MI);
+ bool foldVMergeToMask(MachineInstr &MI) const;
bool hasSameEEW(const MachineInstr &User, const MachineInstr &Src) const;
bool isAllOnesMask(const MachineInstr *MaskDef) const;
std::optional<unsigned> getConstant(const MachineOperand &VL) const;
bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const;
- bool isKnownSameDefs(const MachineOperand &A, const MachineOperand &B) const;
+ bool isKnownSameDefs(Register A, Register B) const;
};
} // namespace
@@ -380,13 +381,23 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
return true;
}
-bool RISCVVectorPeephole::isKnownSameDefs(const MachineOperand &A,
- const MachineOperand &B) const {
- if (A.getReg().isPhysical() || B.getReg().isPhysical())
+bool RISCVVectorPeephole::isKnownSameDefs(Register A, Register B) const {
+ if (A.isPhysical() || B.isPhysical())
return false;
- return TRI->lookThruCopyLike(A.getReg(), MRI) ==
- TRI->lookThruCopyLike(B.getReg(), MRI);
+ auto LookThruVirtRegCopies = [this](Register Reg) {
+ while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) {
+ if (!Def->isFullCopy())
+ break;
+ Register Src = Def->getOperand(1).getReg();
+ if (!Src.isVirtual())
+ break;
+ Reg = Src;
+ }
+ return Reg;
+ };
+
+ return LookThruVirtRegCopies(A) == LookThruVirtRegCopies(B);
}
/// If a PseudoVMERGE_VVM's true operand is a masked pseudo and both have the
@@ -414,7 +425,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
const MachineOperand &TrueMask =
True->getOperand(TrueMaskedInfo->MaskOpIdx + True->getNumExplicitDefs());
const MachineOperand &MIMask = MI.getOperand(4);
- if (!isKnownSameDefs(TrueMask, MIMask))
+ if (!isKnownSameDefs(TrueMask.getReg(), MIMask.getReg()))
return false;
// True's passthru needs to be equivalent to False
@@ -663,6 +674,133 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
return true;
}
+/// Try to fold away VMERGE_VVM instructions into their operands:
+///
+/// %true = PseudoVADD_VV ...
+/// %x = PseudoVMERGE_VVM_M1 %false, %false, %true, %mask
+/// ->
+/// %x = PseudoVADD_VV_M1_MASK %false, ..., %mask
+///
+/// We can only fold if vmerge's passthru operand, vmerge's false operand and
+/// %true's passthru operand (if it has one) are the same. This is because we
+/// have to consolidate them into one passthru operand in the result.
+///
+/// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
+/// mask is all ones.
+///
+/// The resulting VL is the minimum of the two VLs.
+///
+/// The resulting policy is the effective policy the vmerge would have had,
+/// i.e. whether or not it's passthru operand was implicit-def.
+bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
+ if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM)
+ return false;
+
+ Register PassthruReg = MI.getOperand(1).getReg();
+ Register FalseReg = MI.getOperand(2).getReg();
+ Register TrueReg = MI.getOperand(3).getReg();
+ if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg))
+ return false;
+ MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg);
+ if (True.getParent() != MI.getParent())
+ return false;
+ const MachineOperand &MaskOp = MI.getOperand(4);
+ MachineInstr *Mask = MRI->getUniqueVRegDef(MaskOp.getReg());
+ assert(Mask);
+
+ const RISCV::RISCVMaskedPseudoInfo *Info =
+ RISCV::lookupMaskedIntrinsicByUnmasked(True.getOpcode());
+ if (!Info)
+ return false;
+
+ // If the EEW of True is different from vmerge's SEW, then we can't fold.
+ if (!hasSameEEW(MI, True))
+ return false;
+
+ // We require that either passthru and false are the same, or that passthru
+ // is undefined.
+ if (PassthruReg != RISCV::NoRegister &&
+ !isKnownSameDefs(PassthruReg, FalseReg))
+ return false;
+
+ // If True has a passthru operand then it needs to be the same as vmerge's
+ // False, since False will be used for the result's passthru operand.
+ Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg();
+ if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) &&
+ TruePassthru != RISCV::NoRegister &&
+ !isKnownSameDefs(TruePassthru, FalseReg))
+ return false;
+
+ // Make sure it doesn't raise any observable fp exceptions, since changing the
+ // active elements will affect how fflags is set.
+ if (True.hasUnmodeledSideEffects() || True.mayRaiseFPException())
+ return false;
+
+ const MachineOperand &VMergeVL =
+ MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
+ const MachineOperand &TrueVL =
+ True.getOperand(RISCVII::getVLOpNum(True.getDesc()));
+
+ MachineOperand MinVL = MachineOperand::CreateImm(0);
+ if (RISCV::isVLKnownLE(TrueVL, VMergeVL))
+ MinVL = TrueVL;
+ else if (RISCV::isVLKnownLE(VMergeVL, TrueVL))
+ MinVL = VMergeVL;
+ else
+ return false;
+
+ unsigned RVVTSFlags =
+ TII->get(RISCV::getRVVMCOpcode(True.getOpcode())).TSFlags;
+ if (RISCVII::elementsDependOnVL(RVVTSFlags) && !TrueVL.isIdenticalTo(MinVL))
+ return false;
+ if (RISCVII::elementsDependOnMask(RVVTSFlags) && !isAllOnesMask(Mask))
+ return false;
+
+ // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
+ // operand is undefined.
+ //
+ // However, if the VL became smaller than what the vmerge had originally, then
+ // elements past VL that were previously in the vmerge's body will have moved
+ // to the tail. In that case we always need to use tail undisturbed to
+ // preserve them.
+ uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (PassthruReg == RISCV::NoRegister && RISCV::isVLKnownLE(VMergeVL, MinVL))
+ Policy |= RISCVVType::TAIL_AGNOSTIC;
+
+ assert(RISCVII::hasVecPolicyOp(True.getDesc().TSFlags) &&
+ "Foldable unmasked pseudo should have a policy op already");
+
+ // Make sure the mask dominates True, otherwise move down True so it does.
+ // VL will always dominate because if its a register they need to be the same.
+ if (!ensureDominates(MaskOp, True))
+ return false;
+
+ True.setDesc(TII->get(Info->MaskedPseudo));
+
+ // Insert the mask operand.
+ // TODO: Increment MaskOpIdx by number of explicit defs?
+ True.insert(&True.getOperand(Info->MaskOpIdx + True.getNumExplicitDefs()),
+ MachineOperand::CreateReg(MaskOp.getReg(), false));
+
+ // Update the passthru, AVL and policy.
+ True.getOperand(True.getNumExplicitDefs()).setReg(FalseReg);
+ True.removeOperand(RISCVII::getVLOpNum(True.getDesc()));
+ True.insert(&True.getOperand(RISCVII::getVLOpNum(True.getDesc())), MinVL);
+ True.getOperand(RISCVII::getVecPolicyOpNum(True.getDesc())).setImm(Policy);
+
+ MRI->replaceRegWith(True.getOperand(0).getReg(), MI.getOperand(0).getReg());
+ // Now that True is masked, constrain its operands from vr -> vrnov0.
+ for (MachineOperand &MO : True.explicit_operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ MRI->constrainRegClass(
+ MO.getReg(), True.getRegClassConstraint(MO.getOperandNo(), TII, TRI));
+ }
+ MI.eraseFromParent();
+
+ return true;
+}
+
bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -679,6 +817,9 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : make_early_inc_range(MBB))
+ Changed |= foldVMergeToMask(MI);
+
for (MachineInstr &MI : make_early_inc_range(MBB)) {
Changed |= convertToVLMAX(MI);
Changed |= tryToReduceVL(MI);
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index 5dc532273b770..0d8aff306252e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -313,12 +313,12 @@ define i32 @test_nxv128i1(<vscale x 128 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v0, v6, a0
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v6, v7, a1
-; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v7, a0
; CHECK-NEXT: vslidedown.vx v5, v6, a0
+; CHECK-NEXT: vslidedown.vx v4, v7, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
@@ -364,9 +364,9 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vmv1r.v v7, v9
; CHECK-NEXT: vmv1r.v v5, v8
; CHECK-NEXT: vmv1r.v v4, v0
-; CHECK-NEXT: vmv.v.i v24, 0
+; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv a2, a0
@@ -376,7 +376,7 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vmv1r.v v0, v5
-; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
@@ -389,7 +389,7 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v2, v5, a0
; CHECK-NEXT: vmv.v.v v0, v3
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v16, 1, v0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: mv a3, a2
@@ -399,41 +399,41 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vmv1r.v v0, v2
-; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v3, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
+; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
-; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a2) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v2, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v16, v24, 1, v0
+; CHECK-NEXT: vmerge.vim v24, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v4, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
+; CHECK-NEXT: vmerge.vim v16, v8, 1, v0
; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v5, a1
-; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v24, v24, 1, v0
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v6, a1
; CHECK-NEXT: vslidedown.vx v5, v7, a1
+; CHECK-NEXT: vslidedown.vx v4, v6, a1
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu
-; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v5
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v24
+; CHECK-NEXT: vmv1r.v v0, v5
+; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
+; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # vscale x 64-byte Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
@@ -443,7 +443,7 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: vslidedown.vx v0, v4, a1
; CHECK-NEXT: vslidedown.vx v3, v5, a1
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
-; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
+; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t
; CHECK-NEXT: vmv1r.v v0, v3
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
@@ -451,7 +451,7 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: vadd.vv v8, v8, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
@@ -492,16 +492,16 @@ define i32 @test_nxv256i1(<vscale x 256 x i1> %x) {
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t
-; CHECK-NEXT: vadd.vv v0, v24, v8
+; CHECK-NEXT: vadd.vv v24, v24, v8
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vadd.vv v8, v8, v24
-; CHECK-NEXT: vadd.vv v16, v0, v16
+; CHECK-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vadd.vv v8, v8, v0
+; CHECK-NEXT: vadd.vv v16, v24, v16
; CHECK-NEXT: vadd.vv v8, v16, v8
; CHECK-NEXT: vmv.s.x v16, zero
; CHECK-NEXT: vredsum.vs v8, v8, v16
@@ -537,17 +537,18 @@ entry:
define i16 @test_narrow_nxv64i1(<vscale x 64 x i1> %x) {
; CHECK-LABEL: test_narrow_nxv64i1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v0, a0
+; CHECK-NEXT: vslidedown.vx v8, v0, a0
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu
-; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
-; CHECK-NEXT: vmv.s.x v16, zero
-; CHECK-NEXT: vredsum.vs v8, v8, v16
+; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t
+; CHECK-NEXT: vmv.s.x v8, zero
+; CHECK-NEXT: vredsum.vs v8, v16, v8
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index eb40c133514fe..8837dd6bbc0a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1748,16 +1748,16 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
; CHECK-LABEL: buildvec_v8f32_zvl256:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vfmv.v.f v9, fa4
+; CHECK-NEXT: vfmv.v.f v8, fa4
+; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vmv.v.i v0, 15
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
-; CHECK-NEXT: vfslide1down.vf v9, v9, fa5
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v9, v9, fa6
-; CHECK-NEXT: vfslide1down.vf v10, v8, fa3
-; CHECK-NEXT: vfslide1down.vf v8, v9, fa7
-; CHECK-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v9, v9, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vfslide1down.vf v9, v9, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: vfslide1down.vf v9, v9, fa3
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x float> poison, float %e0, i64 0
%v1 = insertelement <8 x float> %v0, float %e1, i64 1
@@ -1799,16 +1799,16 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
; CHECK-LABEL: buildvec_v8f64_zvl512:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vfmv.v.f v9, fa4
+; CHECK-NEXT: vfmv.v.f v8, fa4
+; CHECK-NEXT: vfmv.v.f v9, fa0
; CHECK-NEXT: vmv.v.i v0, 15
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
-; CHECK-NEXT: vfslide1down.vf v9, v9, fa5
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v9, v9, fa6
-; CHECK-NEXT: vfslide1down.vf v10, v8, fa3
-; CHECK-NEXT: vfslide1down.vf v8, v9, fa7
-; CHECK-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v9, v9, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vfslide1down.vf v9, v9, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: vfslide1down.vf v9, v9, fa3
+; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
; CHECK-NEXT: ret
%v0 = insertelement <8 x double> poison, double %e0, i64 0
%v1 = insertelement <8 x double> %v0, double %e1, i64 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 1fa96d3c07ca9..18ef49f754760 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1359,23 +1359,23 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV32-ONLY-NEXT: lbu s0, 14(a0)
; RV32-ONLY-NEXT: lbu a0, 15(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-ONLY-NEXT: vmv.v.x v8, a1
-; RV32-ONLY-NEXT: vmv.v.x v9, t1
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t2
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t3
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t4
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t5
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t6
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, s0
-; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0
-; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0
-; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-ONLY-NEXT: vmv.v.x v8, t1
+; RV32-ONLY-NEXT: vmv.v.x v9, a1
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0
+; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: .cfi_restore s0
; RV32-ONLY-NEXT: addi sp, sp, 16
@@ -1494,23 +1494,23 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV64V-ONLY-NEXT: lbu s0, 14(a0)
; RV64V-ONLY-NEXT: lbu a0, 15(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64V-ONLY-NEXT: vmv.v.x v8, a1
-; RV64V-ONLY-NEXT: vmv.v.x v9, t1
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t2
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t3
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t4
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t5
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t6
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, s0
-; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0
-; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: vmv.v.x v8, t1
+; RV64V-ONLY-NEXT: vmv.v.x v9, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: .cfi_restore s0
; RV64V-ONLY-NEXT: addi sp, sp, 16
@@ -1631,23 +1631,23 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV64ZVE32-NEXT: lbu s0, 14(a0)
; RV64ZVE32-NEXT: lbu a0, 15(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64ZVE32-NEXT: vmv.v.x v8, a1
-; RV64ZVE32-NEXT: vmv.v.x v9, t1
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t2
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t3
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t4
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t5
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t6
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, s0
-; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: vmv.v.x v8, t1
+; RV64ZVE32-NEXT: vmv.v.x v9, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: .cfi_restore s0
; RV64ZVE32-NEXT: addi sp, sp, 16
@@ -1733,23 +1733,23 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV32-ONLY-NEXT: lbu s0, 124(a0)
; RV32-ONLY-NEXT: lbu a0, 144(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-ONLY-NEXT: vmv.v.x v8, a1
-; RV32-ONLY-NEXT: vmv.v.x v9, t1
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t5
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t6
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t3
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, s0
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t4
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0
-; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0
-; RV32-ONLY-NEXT: vslide1down.vx v8, v9, t2
-; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-ONLY-NEXT: vmv.v.x v8, t1
+; RV32-ONLY-NEXT: vmv.v.x v9, a1
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0
+; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: .cfi_restore s0
; RV32-ONLY-NEXT: addi sp, sp, 16
@@ -1868,23 +1868,23 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV64V-ONLY-NEXT: lbu s0, 124(a0)
; RV64V-ONLY-NEXT: lbu a0, 144(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64V-ONLY-NEXT: vmv.v.x v8, a1
-; RV64V-ONLY-NEXT: vmv.v.x v9, t1
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t5
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t6
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t3
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, s0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t4
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0
-; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, t2
-; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: vmv.v.x v8, t1
+; RV64V-ONLY-NEXT: vmv.v.x v9, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: .cfi_restore s0
; RV64V-ONLY-NEXT: addi sp, sp, 16
@@ -2013,23 +2013,23 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV64ZVE32-NEXT: lbu s0, 124(a0)
; RV64ZVE32-NEXT: lbu a0, 144(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64ZVE32-NEXT: vmv.v.x v8, a1
-; RV64ZVE32-NEXT: vmv.v.x v9, t1
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t5
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t6
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t3
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, s0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t4
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0
-; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v9, t2
-; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: vmv.v.x v8, t1
+; RV64ZVE32-NEXT: vmv.v.x v9, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: .cfi_restore s0
; RV64ZVE32-NEXT: addi sp, sp, 16
@@ -2505,17 +2505,17 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RV32-ONLY-NEXT: lbu t0, 105(a0)
; RV32-ONLY-NEXT: lbu a0, 161(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-ONLY-NEXT: vmv.v.x v8, a2
-; RV32-ONLY-NEXT: vmv.v.x v9, a6
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0
-; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5
-; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4
-; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-ONLY-NEXT: vmv.v.x v8, a6
+; RV32-ONLY-NEXT: vmv.v.x v9, a2
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a1
+; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 4
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_edges:
@@ -2592,17 +2592,17 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RV64V-ONLY-NEXT: lbu t0, 105(a0)
; RV64V-ONLY-NEXT: lbu a0, 161(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64V-ONLY-NEXT: vmv.v.x v8, a2
-; RV64V-ONLY-NEXT: vmv.v.x v9, a6
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0
-; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5
-; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4
-; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: vmv.v.x v8, a6
+; RV64V-ONLY-NEXT: vmv.v.x v9, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a1
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 4
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_edges:
@@ -2679,17 +2679,17 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RV64ZVE32-NEXT: lbu t0, 105(a0)
; RV64ZVE32-NEXT: lbu a0, 161(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64ZVE32-NEXT: vmv.v.x v8, a2
-; RV64ZVE32-NEXT: vmv.v.x v9, a6
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0
-; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5
-; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4
-; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: vmv.v.x v8, a6
+; RV64ZVE32-NEXT: vmv.v.x v9, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a1
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
+; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ret
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
@@ -2740,21 +2740,21 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV32-ONLY-NEXT: lbu t1, 144(a0)
; RV32-ONLY-NEXT: lbu a0, 154(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-ONLY-NEXT: vmv.v.x v8, a1
-; RV32-ONLY-NEXT: vmv.v.x v9, a6
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7
+; RV32-ONLY-NEXT: vmv.v.x v8, a6
+; RV32-ONLY-NEXT: vmv.v.x v9, a1
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 2
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 1
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t1
-; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5
-; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0
-; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t1
+; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 1
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered:
@@ -2834,21 +2834,21 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV64V-ONLY-NEXT: lbu t1, 144(a0)
; RV64V-ONLY-NEXT: lbu a0, 154(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64V-ONLY-NEXT: vmv.v.x v8, a1
-; RV64V-ONLY-NEXT: vmv.v.x v9, a6
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7
+; RV64V-ONLY-NEXT: vmv.v.x v8, a6
+; RV64V-ONLY-NEXT: vmv.v.x v9, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 2
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t1
-; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0
-; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t1
+; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered:
@@ -2930,21 +2930,21 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV64ZVE32-NEXT: lbu t1, 144(a0)
; RV64ZVE32-NEXT: lbu a0, 154(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64ZVE32-NEXT: vmv.v.x v8, a1
-; RV64ZVE32-NEXT: vmv.v.x v9, a6
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7
+; RV64ZVE32-NEXT: vmv.v.x v8, a6
+; RV64ZVE32-NEXT: vmv.v.x v9, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t1
-; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5
-; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t1
+; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
+; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
@@ -3002,16 +3002,16 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %
; RV32-ONLY-LABEL: buildvec_v8i8_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-ONLY-NEXT: vmv.v.x v8, a0
-; RV32-ONLY-NEXT: vmv.v.x v9, a4
+; RV32-ONLY-NEXT: vmv.v.x v8, a4
+; RV32-ONLY-NEXT: vmv.v.x v9, a0
; RV32-ONLY-NEXT: vmv.v.i v0, 15
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6
-; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a3
-; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a7
-; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a1
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v8i8_pack:
@@ -3055,16 +3055,16 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %
; RV64V-ONLY-LABEL: buildvec_v8i8_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV64V-ONLY-NEXT: vmv.v.x v8, a0
-; RV64V-ONLY-NEXT: vmv.v.x v9, a4
+; RV64V-ONLY-NEXT: vmv.v.x v8, a4
+; RV64V-ONLY-NEXT: vmv.v.x v9, a0
; RV64V-ONLY-NEXT: vmv.v.i v0, 15
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6
-; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a3
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a7
-; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v8i8_pack:
@@ -3110,16 +3110,16 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %
; RV64ZVE32-LABEL: buildvec_v8i8_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV64ZVE32-NEXT: vmv.v.x v8, a0
-; RV64ZVE32-NEXT: vmv.v.x v9, a4
+; RV64ZVE32-NEXT: vmv.v.x v8, a4
+; RV64ZVE32-NEXT: vmv.v.x v9, a0
; RV64ZVE32-NEXT: vmv.v.i v0, 15
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a3
-; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a7
-; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i8> poison, i8 %e1, i32 0
%v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index c29ccd45528b8..3bfe41337a110 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -280,16 +280,16 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vslide1down.vx v9, v8, a0
-; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vslide1down.vx v9, v8, a3
+; CHECK-NEXT: li a3, 1
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v9, v9, zero
; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vslide1down.vx v9, v9, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, zero
-; CHECK-NEXT: vslide1down.vx v9, v9, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vslide1down.vx v9, v9, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vand.vi v8, v9, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
;
@@ -297,16 +297,16 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) {
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; ZVE32F-NEXT: vmv.v.x v8, a0
-; ZVE32F-NEXT: vslide1down.vx v9, v8, a0
-; ZVE32F-NEXT: li a0, 1
+; ZVE32F-NEXT: vslide1down.vx v9, v8, a3
+; ZVE32F-NEXT: li a3, 1
; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, zero
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
-; ZVE32F-NEXT: vand.vi v8, v8, 1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vand.vi v8, v9, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
; ZVE32F-NEXT: ret
%1 = insertelement <8 x i1> poison, i1 %x, i32 0
@@ -325,16 +325,16 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vslide1down.vx v9, v8, a0
-; CHECK-NEXT: li a0, 1
+; CHECK-NEXT: vslide1down.vx v9, v8, a3
+; CHECK-NEXT: li a3, 1
; CHECK-NEXT: vmv.v.i v0, 15
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v9, v9, zero
; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vslide1down.vx v9, v9, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, zero
-; CHECK-NEXT: vslide1down.vx v9, v9, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vslide1down.vx v9, v9, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vand.vi v8, v9, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
;
@@ -342,16 +342,16 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %
; ZVE32F: # %bb.0:
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; ZVE32F-NEXT: vmv.v.x v8, a0
-; ZVE32F-NEXT: vslide1down.vx v9, v8, a0
-; ZVE32F-NEXT: li a0, 1
+; ZVE32F-NEXT: vslide1down.vx v9, v8, a3
+; ZVE32F-NEXT: li a3, 1
; ZVE32F-NEXT: vmv.v.i v0, 15
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; ZVE32F-NEXT: vslide1down.vx v9, v9, zero
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, zero
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
-; ZVE32F-NEXT: vand.vi v8, v8, 1
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vand.vi v8, v9, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
; ZVE32F-NEXT: ret
%1 = insertelement <8 x i1> poison, i1 %x, i32 0
@@ -371,14 +371,14 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vmv.v.i v0, 15
-; CHECK-NEXT: vslide1down.vx v9, v8, a0
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v9, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v9, v9, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
-; CHECK-NEXT: vand.vi v8, v8, 1
+; CHECK-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; CHECK-NEXT: vand.vi v8, v9, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
;
@@ -387,14 +387,14 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize {
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; ZVE32F-NEXT: vmv.v.x v8, a0
; ZVE32F-NEXT: vmv.v.i v0, 15
-; ZVE32F-NEXT: vslide1down.vx v9, v8, a0
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v9, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
-; ZVE32F-NEXT: vand.vi v8, v8, 1
+; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVE32F-NEXT: vand.vi v8, v9, 1
; ZVE32F-NEXT: vmsne.vi v0, v8, 0
; ZVE32F-NEXT: ret
%1 = insertelement <8 x i1> poison, i1 %x, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 533b8b6864ebc..67d55366674f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -14055,45 +14055,45 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV32-NEXT: vmv.x.s t0, v8
; RV32-NEXT: lbu t1, 0(a1)
; RV32-NEXT: lbu a1, 1(a1)
-; RV32-NEXT: lbu t2, 0(a2)
-; RV32-NEXT: lbu a2, 1(a2)
; RV32-NEXT: slli a0, a0, 8
; RV32-NEXT: or a0, a0, a6
-; RV32-NEXT: lbu a6, 0(a3)
-; RV32-NEXT: lbu a3, 1(a3)
+; RV32-NEXT: lbu a6, 0(a2)
+; RV32-NEXT: lbu a2, 1(a2)
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: or a1, a1, t1
-; RV32-NEXT: lbu t1, 0(a4)
-; RV32-NEXT: lbu a4, 1(a4)
+; RV32-NEXT: lbu t1, 0(a3)
+; RV32-NEXT: lbu a3, 1(a3)
; RV32-NEXT: slli a2, a2, 8
-; RV32-NEXT: or a2, a2, t2
-; RV32-NEXT: lbu t2, 0(a5)
-; RV32-NEXT: lbu a5, 1(a5)
+; RV32-NEXT: or a2, a2, a6
+; RV32-NEXT: lbu a6, 0(a4)
+; RV32-NEXT: lbu a4, 1(a4)
; RV32-NEXT: slli a3, a3, 8
-; RV32-NEXT: or a3, a3, a6
+; RV32-NEXT: or a3, a3, t1
+; RV32-NEXT: lbu t1, 0(a5)
+; RV32-NEXT: lbu a5, 1(a5)
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: or a4, a4, a6
; RV32-NEXT: lbu a6, 0(a7)
; RV32-NEXT: lbu a7, 1(a7)
-; RV32-NEXT: slli a4, a4, 8
-; RV32-NEXT: or a4, a4, t1
+; RV32-NEXT: slli a5, a5, 8
+; RV32-NEXT: or a5, a5, t1
; RV32-NEXT: lbu t1, 0(t0)
; RV32-NEXT: lbu t0, 1(t0)
-; RV32-NEXT: slli a5, a5, 8
-; RV32-NEXT: or a5, a5, t2
; RV32-NEXT: slli a7, a7, 8
; RV32-NEXT: or a6, a7, a6
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: slli t0, t0, 8
; RV32-NEXT: or a7, t0, t1
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vmv.v.x v9, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v9, v9, a5
-; RV32-NEXT: vslide1down.vx v10, v8, a3
+; RV32-NEXT: vslide1down.vx v10, v8, a2
; RV32-NEXT: vslide1down.vx v8, v9, a6
-; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v8, v8, a7
-; RV32-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV32-NEXT: vslide1down.vx v9, v10, a3
+; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_strided_unaligned:
@@ -14215,15 +14215,15 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: slli a0, a0, 8
; RV64ZVE32F-NEXT: or a0, a0, t1
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a2
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14258,15 +14258,15 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 26(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14303,15 +14303,15 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 30(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14342,21 +14342,21 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: lh a2, 26(a0)
; RV64ZVE32F-NEXT: lh a3, 28(a0)
; RV64ZVE32F-NEXT: lh a4, 30(a0)
-; RV64ZVE32F-NEXT: lh a5, 16(a0)
-; RV64ZVE32F-NEXT: lh a6, 18(a0)
-; RV64ZVE32F-NEXT: lh a7, 20(a0)
+; RV64ZVE32F-NEXT: lh a5, 20(a0)
+; RV64ZVE32F-NEXT: lh a6, 16(a0)
+; RV64ZVE32F-NEXT: lh a7, 18(a0)
; RV64ZVE32F-NEXT: lh a0, 22(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a3
-; RV64ZVE32F-NEXT: vmv.v.x v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14387,21 +14387,21 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: lh a2, 22(a0)
; RV64ZVE32F-NEXT: lh a3, 28(a0)
; RV64ZVE32F-NEXT: lh a4, 30(a0)
-; RV64ZVE32F-NEXT: lh a5, 4(a0)
-; RV64ZVE32F-NEXT: lh a6, 6(a0)
-; RV64ZVE32F-NEXT: lh a7, 12(a0)
+; RV64ZVE32F-NEXT: lh a5, 12(a0)
+; RV64ZVE32F-NEXT: lh a6, 4(a0)
+; RV64ZVE32F-NEXT: lh a7, 6(a0)
; RV64ZVE32F-NEXT: lh a0, 14(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a3
-; RV64ZVE32F-NEXT: vmv.v.x v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14437,15 +14437,15 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a5
-; RV64ZVE32F-NEXT: vmv.v.x v9, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vmv.v.x v9, a5
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14484,15 +14484,15 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a5
-; RV64ZVE32F-NEXT: vmv.v.x v9, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vmv.v.x v9, a5
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14531,15 +14531,15 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 20(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.x v8, a4
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a0
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a3
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a0
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14582,15 +14582,15 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 22(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14630,15 +14630,15 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 22(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14687,15 +14687,15 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
@@ -14735,15 +14735,15 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-NEXT: lh a0, 14(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vmv.v.x v9, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7
-; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0
-; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
+; RV64ZVE32F-NEXT: vmv.v.x v8, a5
+; RV64ZVE32F-NEXT: vmv.v.x v9, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index b6267bf481c85..0909c11078ff4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -1340,26 +1340,26 @@ define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; ZVE32F-LABEL: unzip2a_dual_v4i64:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a3, 0(a2)
-; ZVE32F-NEXT: ld a2, 16(a2)
-; ZVE32F-NEXT: ld a4, 0(a1)
+; ZVE32F-NEXT: ld a3, 0(a1)
; ZVE32F-NEXT: ld a1, 16(a1)
+; ZVE32F-NEXT: ld a4, 0(a2)
+; ZVE32F-NEXT: ld a2, 16(a2)
; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
; ZVE32F-NEXT: vmv.v.i v0, 15
-; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a5, a1, 32
; ZVE32F-NEXT: srli a6, a3, 32
-; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli a7, a2, 32
; ZVE32F-NEXT: srli t0, a4, 32
; ZVE32F-NEXT: vmv.v.x v8, a4
; ZVE32F-NEXT: vmv.v.x v9, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVE32F-NEXT: vse32.v v9, (a0)
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; ZVE32F-NEXT: vse32.v v8, (a0)
; ZVE32F-NEXT: ret
;
; ZIP-LABEL: unzip2a_dual_v4i64:
@@ -1378,9 +1378,9 @@ entry:
define <16 x i64> @unzip2a_dual_v16i64(<16 x i64> %a, <16 x i64> %b) {
; V-LABEL: unzip2a_dual_v16i64:
; V: # %bb.0: # %entry
-; V-NEXT: lui a0, 5
; V-NEXT: vsetivli zero, 16, e16, m1, ta, ma
; V-NEXT: vid.v v16
+; V-NEXT: lui a0, 5
; V-NEXT: addi a0, a0, 1365
; V-NEXT: vmv.s.x v20, a0
; V-NEXT: li a0, -256
@@ -1526,26 +1526,26 @@ define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
;
; ZVE32F-LABEL: unzip2a_dual_v4i64_exact:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a3, 0(a2)
-; ZVE32F-NEXT: ld a2, 16(a2)
-; ZVE32F-NEXT: ld a4, 0(a1)
+; ZVE32F-NEXT: ld a3, 0(a1)
; ZVE32F-NEXT: ld a1, 16(a1)
+; ZVE32F-NEXT: ld a4, 0(a2)
+; ZVE32F-NEXT: ld a2, 16(a2)
; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
; ZVE32F-NEXT: vmv.v.i v0, 15
-; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a5, a1, 32
; ZVE32F-NEXT: srli a6, a3, 32
-; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli a7, a2, 32
; ZVE32F-NEXT: srli t0, a4, 32
; ZVE32F-NEXT: vmv.v.x v8, a4
; ZVE32F-NEXT: vmv.v.x v9, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVE32F-NEXT: vs1r.v v9, (a0)
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; ZVE32F-NEXT: vs1r.v v8, (a0)
; ZVE32F-NEXT: ret
;
; ZIP-LABEL: unzip2a_dual_v4i64_exact:
@@ -1574,26 +1574,26 @@ define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscal
;
; ZVE32F-LABEL: unzip2a_dual_v4i64_exact_nf2:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a3, 0(a2)
-; ZVE32F-NEXT: ld a2, 16(a2)
-; ZVE32F-NEXT: ld a4, 0(a1)
+; ZVE32F-NEXT: ld a3, 0(a1)
; ZVE32F-NEXT: ld a1, 16(a1)
+; ZVE32F-NEXT: ld a4, 0(a2)
+; ZVE32F-NEXT: ld a2, 16(a2)
; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
; ZVE32F-NEXT: vmv.v.i v0, 15
-; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a5, a1, 32
; ZVE32F-NEXT: srli a6, a3, 32
-; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli a7, a2, 32
; ZVE32F-NEXT: srli t0, a4, 32
; ZVE32F-NEXT: vmv.v.x v8, a4
; ZVE32F-NEXT: vmv.v.x v9, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVE32F-NEXT: vse32.v v9, (a0)
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; ZVE32F-NEXT: vse32.v v8, (a0)
; ZVE32F-NEXT: ret
;
; ZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
@@ -1651,76 +1651,76 @@ define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscal
;
; ZVE32F-LABEL: unzip2a_dual_v16i64_exact:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a6, 0(a1)
+; ZVE32F-NEXT: ld a5, 96(a2)
+; ZVE32F-NEXT: ld a7, 0(a1)
; ZVE32F-NEXT: ld a4, 16(a1)
-; ZVE32F-NEXT: ld a7, 32(a1)
+; ZVE32F-NEXT: ld t0, 32(a1)
; ZVE32F-NEXT: ld a3, 48(a1)
-; ZVE32F-NEXT: ld a5, 80(a1)
-; ZVE32F-NEXT: ld t0, 96(a1)
; ZVE32F-NEXT: ld t1, 64(a1)
+; ZVE32F-NEXT: ld a6, 80(a1)
+; ZVE32F-NEXT: ld t2, 96(a1)
; ZVE32F-NEXT: ld a1, 112(a1)
-; ZVE32F-NEXT: srli t2, a6, 32
; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
-; ZVE32F-NEXT: vmv.v.x v8, t0
+; ZVE32F-NEXT: vmv.v.x v8, a7
+; ZVE32F-NEXT: srli a7, a7, 32
+; ZVE32F-NEXT: vmv.v.x v9, t0
; ZVE32F-NEXT: srli t0, t0, 32
-; ZVE32F-NEXT: vmv.v.x v9, t1
-; ZVE32F-NEXT: srli t1, t1, 32
-; ZVE32F-NEXT: vmv.v.x v10, a6
-; ZVE32F-NEXT: vslide1down.vx v9, v9, t1
-; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
-; ZVE32F-NEXT: vslide1down.vx v10, v10, t2
-; ZVE32F-NEXT: ld t1, 32(a2)
-; ZVE32F-NEXT: ld t0, 16(a2)
-; ZVE32F-NEXT: ld t2, 0(a2)
-; ZVE32F-NEXT: ld a6, 48(a2)
-; ZVE32F-NEXT: vmv.v.x v11, t1
+; ZVE32F-NEXT: vmv.v.x v10, t1
; ZVE32F-NEXT: srli t1, t1, 32
-; ZVE32F-NEXT: vmv.v.x v12, t2
+; ZVE32F-NEXT: vmv.v.x v11, t2
; ZVE32F-NEXT: srli t2, t2, 32
-; ZVE32F-NEXT: vmv.v.x v13, a7
-; ZVE32F-NEXT: srli a7, a7, 32
-; ZVE32F-NEXT: vslide1down.vx v13, v13, a7
-; ZVE32F-NEXT: vslide1down.vx v12, v12, t2
-; ZVE32F-NEXT: vslide1down.vx v11, v11, t1
-; ZVE32F-NEXT: ld a7, 64(a2)
-; ZVE32F-NEXT: ld t1, 80(a2)
-; ZVE32F-NEXT: ld t2, 96(a2)
-; ZVE32F-NEXT: ld a2, 112(a2)
-; ZVE32F-NEXT: vmv.v.x v14, a7
-; ZVE32F-NEXT: srli a7, a7, 32
-; ZVE32F-NEXT: vslide1down.vx v14, v14, a7
-; ZVE32F-NEXT: vmv.v.x v15, t2
-; ZVE32F-NEXT: srli a7, t2, 32
-; ZVE32F-NEXT: vslide1down.vx v15, v15, a7
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
+; ZVE32F-NEXT: vslide1down.vx v11, v11, t2
+; ZVE32F-NEXT: vslide1down.vx v10, v10, t1
+; ZVE32F-NEXT: vslide1down.vx v12, v9, t0
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; ZVE32F-NEXT: ld t0, 0(a2)
+; ZVE32F-NEXT: ld t1, 16(a2)
+; ZVE32F-NEXT: ld t2, 32(a2)
+; ZVE32F-NEXT: ld a7, 48(a2)
+; ZVE32F-NEXT: vmv.v.x v9, t0
+; ZVE32F-NEXT: srli t0, t0, 32
+; ZVE32F-NEXT: vmv.v.x v13, t2
+; ZVE32F-NEXT: srli t2, t2, 32
+; ZVE32F-NEXT: vslide1down.vx v13, v13, t2
+; ZVE32F-NEXT: vslide1down.vx v14, v9, t0
+; ZVE32F-NEXT: ld t0, 64(a2)
+; ZVE32F-NEXT: ld t2, 112(a2)
+; ZVE32F-NEXT: vmv.v.x v9, a5
; ZVE32F-NEXT: srli a5, a5, 32
+; ZVE32F-NEXT: vslide1down.vx v15, v9, a5
+; ZVE32F-NEXT: ld a2, 80(a2)
+; ZVE32F-NEXT: vmv.v.x v9, t0
+; ZVE32F-NEXT: srli a5, t0, 32
; ZVE32F-NEXT: vslide1down.vx v16, v9, a5
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v9, v11, a1
; ZVE32F-NEXT: srli a1, a1, 32
-; ZVE32F-NEXT: vslide1down.vx v9, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v8, v10, a4
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
+; ZVE32F-NEXT: vslide1down.vx v10, v10, a6
+; ZVE32F-NEXT: srli a1, a6, 32
+; ZVE32F-NEXT: vslide1down.vx v10, v10, a1
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; ZVE32F-NEXT: srli a4, a4, 32
-; ZVE32F-NEXT: vslide1down.vx v10, v8, a4
-; ZVE32F-NEXT: vslide1down.vx v8, v12, t0
-; ZVE32F-NEXT: srli a1, t0, 32
-; ZVE32F-NEXT: vslide1down.vx v12, v8, a1
+; ZVE32F-NEXT: vslide1down.vx v11, v8, a4
; ZVE32F-NEXT: vmv.v.i v0, 15
; ZVE32F-NEXT: vslide1down.vx v8, v14, t1
; ZVE32F-NEXT: srli a1, t1, 32
; ZVE32F-NEXT: vslide1down.vx v14, v8, a1
-; ZVE32F-NEXT: vslidedown.vi v9, v16, 4, v0.t
-; ZVE32F-NEXT: vslide1down.vx v8, v13, a3
+; ZVE32F-NEXT: vslidedown.vi v9, v10, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v8, v12, a3
; ZVE32F-NEXT: srli a3, a3, 32
; ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
-; ZVE32F-NEXT: vslide1down.vx v10, v11, a6
-; ZVE32F-NEXT: srli a1, a6, 32
+; ZVE32F-NEXT: vslidedown.vi v8, v11, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v10, v13, a7
+; ZVE32F-NEXT: srli a1, a7, 32
; ZVE32F-NEXT: vslide1down.vx v10, v10, a1
-; ZVE32F-NEXT: vslidedown.vi v10, v12, 4, v0.t
-; ZVE32F-NEXT: vslide1down.vx v11, v15, a2
+; ZVE32F-NEXT: vslidedown.vi v10, v14, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v11, v15, t2
+; ZVE32F-NEXT: srli a1, t2, 32
+; ZVE32F-NEXT: vslide1down.vx v11, v11, a1
+; ZVE32F-NEXT: vslide1down.vx v12, v16, a2
; ZVE32F-NEXT: srli a2, a2, 32
-; ZVE32F-NEXT: vslide1down.vx v11, v11, a2
-; ZVE32F-NEXT: vslidedown.vi v11, v14, 4, v0.t
+; ZVE32F-NEXT: vslide1down.vx v12, v12, a2
+; ZVE32F-NEXT: vslidedown.vi v11, v12, 4, v0.t
; ZVE32F-NEXT: vs4r.v v8, (a0)
; ZVE32F-NEXT: ret
;
@@ -1751,26 +1751,26 @@ define <4 x i64> @unzip2b_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
;
; ZVE32F-LABEL: unzip2b_dual_v4i64:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a3, 8(a2)
-; ZVE32F-NEXT: ld a2, 24(a2)
-; ZVE32F-NEXT: ld a4, 8(a1)
+; ZVE32F-NEXT: ld a3, 8(a1)
; ZVE32F-NEXT: ld a1, 24(a1)
+; ZVE32F-NEXT: ld a4, 8(a2)
+; ZVE32F-NEXT: ld a2, 24(a2)
; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
; ZVE32F-NEXT: vmv.v.i v0, 15
-; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a5, a1, 32
; ZVE32F-NEXT: srli a6, a3, 32
-; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli a7, a2, 32
; ZVE32F-NEXT: srli t0, a4, 32
; ZVE32F-NEXT: vmv.v.x v8, a4
; ZVE32F-NEXT: vmv.v.x v9, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVE32F-NEXT: vse32.v v9, (a0)
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; ZVE32F-NEXT: vse32.v v8, (a0)
; ZVE32F-NEXT: ret
;
; ZIP-LABEL: unzip2b_dual_v4i64:
@@ -1802,26 +1802,26 @@ define <4 x i64> @unzip2b_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
;
; ZVE32F-LABEL: unzip2b_dual_v4i64_exact:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a3, 8(a2)
-; ZVE32F-NEXT: ld a2, 24(a2)
-; ZVE32F-NEXT: ld a4, 8(a1)
+; ZVE32F-NEXT: ld a3, 8(a1)
; ZVE32F-NEXT: ld a1, 24(a1)
+; ZVE32F-NEXT: ld a4, 8(a2)
+; ZVE32F-NEXT: ld a2, 24(a2)
; ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, mu
; ZVE32F-NEXT: vmv.v.i v0, 15
-; ZVE32F-NEXT: srli a5, a2, 32
+; ZVE32F-NEXT: srli a5, a1, 32
; ZVE32F-NEXT: srli a6, a3, 32
-; ZVE32F-NEXT: srli a7, a1, 32
+; ZVE32F-NEXT: srli a7, a2, 32
; ZVE32F-NEXT: srli t0, a4, 32
; ZVE32F-NEXT: vmv.v.x v8, a4
; ZVE32F-NEXT: vmv.v.x v9, a3
; ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; ZVE32F-NEXT: vslide1down.vx v9, v9, a6
-; ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; ZVE32F-NEXT: vslide1down.vx v9, v9, a2
+; ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; ZVE32F-NEXT: vslide1down.vx v9, v9, a5
-; ZVE32F-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVE32F-NEXT: vs1r.v v9, (a0)
+; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
+; ZVE32F-NEXT: vs1r.v v8, (a0)
; ZVE32F-NEXT: ret
;
; ZIP-LABEL: unzip2b_dual_v4i64_exact:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
index 917613d5c786f..3718156971919 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
@@ -598,43 +598,54 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; ZIP-NEXT: addi sp, sp, -16
; ZIP-NEXT: .cfi_def_cfa_offset 16
; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: slli a0, a0, 5
+; ZIP-NEXT: li a1, 40
+; ZIP-NEXT: mul a0, a0, a1
; ZIP-NEXT: sub sp, sp, a0
-; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: li a1, 24
-; ZIP-NEXT: mul a0, a0, a1
+; ZIP-NEXT: slli a0, a0, 5
; ZIP-NEXT: add a0, sp, a0
; ZIP-NEXT: addi a0, a0, 16
; ZIP-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; ZIP-NEXT: addi a0, sp, 16
; ZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; ZIP-NEXT: vslidedown.vi v24, v8, 16
; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZIP-NEXT: vslidedown.vi v16, v8, 16
; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZIP-NEXT: ri.vzip2a.vv v16, v24, v0
+; ZIP-NEXT: ri.vzip2a.vv v8, v16, v0
; ZIP-NEXT: csrr a1, vlenb
-; ZIP-NEXT: li a2, 24
-; ZIP-NEXT: mul a1, a1, a2
+; ZIP-NEXT: slli a1, a1, 3
; ZIP-NEXT: add a1, sp, a1
; ZIP-NEXT: addi a1, a1, 16
-; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: slli a1, a1, 5
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; ZIP-NEXT: vslidedown.vi v24, v24, 16
+; ZIP-NEXT: vslidedown.vi v16, v16, 16
; ZIP-NEXT: csrr a1, vlenb
-; ZIP-NEXT: slli a1, a1, 4
+; ZIP-NEXT: li a2, 24
+; ZIP-NEXT: mul a1, a1, a2
; ZIP-NEXT: add a1, sp, a1
; ZIP-NEXT: addi a1, a1, 16
-; ZIP-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
; ZIP-NEXT: lui a1, 699051
; ZIP-NEXT: addi a1, a1, -1366
; ZIP-NEXT: vmv.s.x v0, a1
; ZIP-NEXT: csrr a1, vlenb
-; ZIP-NEXT: slli a1, a1, 3
+; ZIP-NEXT: slli a1, a1, 4
; ZIP-NEXT: add a1, sp, a1
; ZIP-NEXT: addi a1, a1, 16
-; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: li a2, 24
+; ZIP-NEXT: mul a1, a1, a2
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vl8r.v v16, (a1) # vscale x 64-byte Folded Reload
; ZIP-NEXT: csrr a1, vlenb
; ZIP-NEXT: slli a1, a1, 4
; ZIP-NEXT: add a1, sp, a1
@@ -646,19 +657,21 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; ZIP-NEXT: addi a1, a1, 16
; ZIP-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu
-; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24, v0.t
+; ZIP-NEXT: ri.vzip2a.vv v8, v24, v16, v0.t
+; ZIP-NEXT: vmv.v.v v24, v8
; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: li a1, 24
-; ZIP-NEXT: mul a0, a0, a1
+; ZIP-NEXT: slli a0, a0, 5
; ZIP-NEXT: add a0, sp, a0
; ZIP-NEXT: addi a0, a0, 16
-; ZIP-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; ZIP-NEXT: addi a0, sp, 16
; ZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZIP-NEXT: ri.vzip2a.vv v0, v8, v24
+; ZIP-NEXT: ri.vzip2a.vv v0, v8, v16
; ZIP-NEXT: vmv.v.v v8, v0
+; ZIP-NEXT: vmv.v.v v16, v24
; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: slli a0, a0, 5
+; ZIP-NEXT: li a1, 40
+; ZIP-NEXT: mul a0, a0, a1
; ZIP-NEXT: add sp, sp, a0
; ZIP-NEXT: .cfi_def_cfa sp, 16
; ZIP-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
index 8676803e20e3b..f5216d82c81ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
@@ -195,15 +195,15 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
; RV32-NEXT: lui a0, %hi(.LCPI12_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT: vmv.v.i v16, -1
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: lui a0, %hi(.LCPI12_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1)
; RV32-NEXT: vle16.v v21, (a0)
+; RV32-NEXT: vmv.v.i v16, -1
; RV32-NEXT: li a0, 113
; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vrgatherei16.vv v12, v16, v20
-; RV32-NEXT: vrgatherei16.vv v12, v8, v21, v0.t
+; RV32-NEXT: vrgatherei16.vv v12, v16, v21
+; RV32-NEXT: vrgatherei16.vv v12, v8, v20, v0.t
; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
@@ -227,12 +227,12 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) {
define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
; RV32-LABEL: vrgather_shuffle_vx_v8i64:
; RV32: # %bb.0:
-; RV32-NEXT: lui a0, %hi(.LCPI13_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
-; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV32-NEXT: vle16.v v16, (a0)
; RV32-NEXT: lui a0, %hi(.LCPI13_1)
; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1)
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
+; RV32-NEXT: vle16.v v16, (a0)
+; RV32-NEXT: lui a0, %hi(.LCPI13_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0)
; RV32-NEXT: vle16.v v17, (a0)
; RV32-NEXT: li a0, 140
; RV32-NEXT: vmv.s.x v0, a0
@@ -388,14 +388,13 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) {
define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) {
; CHECK-LABEL: splat_ve2_we0_ins_i0we4:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vrgather.vi v10, v8, 2
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v11, 4
; CHECK-NEXT: li a0, 67
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t
+; CHECK-NEXT: vrgather.vi v10, v8, 2
+; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
%shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> <i32 12, i32 8, i32 2, i32 2, i32 2, i32 2, i32 8, i32 2>
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
deleted file mode 100644
index 0c058b562f53d..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=finalize-isel | FileCheck %s
-
-declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
-declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
-declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32)
-
-; Test result has chain output of true operand of merge.vvm.
-define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
- ; CHECK-LABEL: name: vpmerge_vpload_store
- ; CHECK: bb.0 (%ir-block.0):
- ; CHECK-NEXT: liveins: $v8, $x10, $v0, $x11
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
- ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 8)
- ; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]], -1, 5 /* e32 */ :: (store (<vscale x 1 x s64>) into %ir.p)
- ; CHECK-NEXT: PseudoRET
- %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
- %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
- store <vscale x 2 x i32> %b, ptr %p
- ret void
-}
-
-define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale x 2 x i1> %m, i32 zeroext %vl) {
- ; CHECK-LABEL: name: vpselect_vpload_store
- ; CHECK: bb.0 (%ir-block.0):
- ; CHECK-NEXT: liveins: $v8, $x10, $v0, $x11
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]]
- ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size from %ir.p, align 8)
- ; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]], -1, 5 /* e32 */ :: (store (<vscale x 1 x s64>) into %ir.p)
- ; CHECK-NEXT: PseudoRET
- %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
- %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
- store <vscale x 2 x i32> %b, ptr %p
- ret void
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index ca7f2563e4fc9..e680770637a4f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -141,28 +141,26 @@ define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
define {<8 x i64>, <8 x i64>} @vector_deinterleave_v8i64_v16i64(<16 x i64> %vec) {
; V-LABEL: vector_deinterleave_v8i64_v16i64:
; V: # %bb.0:
-; V-NEXT: li a0, 85
; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; V-NEXT: vmv.v.i v0, -16
; V-NEXT: vid.v v16
; V-NEXT: vsetivli zero, 8, e64, m8, ta, ma
; V-NEXT: vslidedown.vi v24, v8, 8
+; V-NEXT: li a0, 85
+; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; V-NEXT: vmv.v.i v0, -16
; V-NEXT: vmv.s.x v12, a0
; V-NEXT: li a0, 170
-; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; V-NEXT: vadd.vv v20, v16, v16
-; V-NEXT: vmv.s.x v21, a0
+; V-NEXT: vadd.vv v13, v16, v16
+; V-NEXT: vmv.s.x v20, a0
+; V-NEXT: vadd.vi v21, v13, -8
; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; V-NEXT: vcompress.vm v16, v8, v12
; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; V-NEXT: vadd.vi v22, v20, -8
-; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; V-NEXT: vcompress.vm v12, v8, v21
-; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; V-NEXT: vadd.vi v8, v20, -7
+; V-NEXT: vadd.vi v22, v13, -7
; V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; V-NEXT: vrgatherei16.vv v16, v24, v22, v0.t
-; V-NEXT: vrgatherei16.vv v12, v24, v8, v0.t
+; V-NEXT: vcompress.vm v12, v8, v20
+; V-NEXT: vrgatherei16.vv v16, v24, v21, v0.t
+; V-NEXT: vrgatherei16.vv v12, v24, v22, v0.t
; V-NEXT: vmv.v.v v8, v16
; V-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
new file mode 100644
index 0000000000000..03204468dc14c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir
@@ -0,0 +1,57 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole -verify-machineinstrs | FileCheck %s
+
+---
+name: vle32
+body: |
+ bb.0:
+ liveins: $x8, $v0, $v8
+ ; CHECK-LABEL: name: vle32
+ ; CHECK: liveins: $x8, $v0, $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %avl:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %y:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1)
+ %avl:gprnox0 = COPY $x8
+ %passthru:vrnov0 = COPY $v8
+ %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size)
+ %mask:vmv0 = COPY $v0
+ %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */
+...
+---
+name: vle32_no_passthru
+body: |
+ bb.0:
+ liveins: $x8, $v0, $v8
+ ; CHECK-LABEL: name: vle32_no_passthru
+ ; CHECK: liveins: $x8, $v0, $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %avl:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %false:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %y:vrnov0 = PseudoVLE32_V_M1_MASK %false, $noreg, %mask, %avl, 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size, align 1)
+ %avl:gprnox0 = COPY $x8
+ %false:vr = COPY $v8
+ %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size)
+ %mask:vmv0 = COPY $v0
+ %y:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %x, %mask, %avl, 5 /* e32 */
+...
+---
+name: vle32_move_past_passthru
+body: |
+ bb.0:
+ liveins: $x8, $v0, $v8
+ ; CHECK-LABEL: name: vle32_move_past_passthru
+ ; CHECK: liveins: $x8, $v0, $v8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %avl:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %y:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1)
+ %avl:gprnox0 = COPY $x8
+ %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size)
+ %passthru:vrnov0 = COPY $v8
+ %mask:vmv0 = COPY $v0
+ %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %x, %mask, %avl, 5 /* e32 */
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index f94e46771f49c..c8b882b92b934 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1540,12 +1540,11 @@ define <vscale x 8 x i32> @vwadd_vx_splat_zext_i1(<vscale x 8 x i1> %va, i16 %b)
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 48
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vmv.v.x v12, a0
-; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: li a0, 1
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: li a0, 1
; RV64-NEXT: vwaddu.vx v8, v12, a0, v0.t
; RV64-NEXT: ret
%zb = zext i16 %b to i32
@@ -1615,12 +1614,11 @@ define <vscale x 8 x i32> @vwadd_vx_splat_sext_i1(<vscale x 8 x i1> %va, i16 %b)
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srai a0, a0, 48
-; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; RV64-NEXT: vmv.v.x v12, a0
-; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: li a0, 1
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, mu
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: li a0, 1
; RV64-NEXT: vwsub.vx v8, v12, a0, v0.t
; RV64-NEXT: ret
%sb = sext i16 %b to i32
>From f2f28f5d0e2f4467840ab1d8f9dd90e31c21be6e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 13 Jun 2025 17:15:59 +0100
Subject: [PATCH 2/2] Update == RISCV::NoRegister checks
---
llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 69c288c7a2e7a..8caf67ffd10b9 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -719,15 +719,13 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
// We require that either passthru and false are the same, or that passthru
// is undefined.
- if (PassthruReg != RISCV::NoRegister &&
- !isKnownSameDefs(PassthruReg, FalseReg))
+ if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg))
return false;
// If True has a passthru operand then it needs to be the same as vmerge's
// False, since False will be used for the result's passthru operand.
Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg();
- if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) &&
- TruePassthru != RISCV::NoRegister &&
+ if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru &&
!isKnownSameDefs(TruePassthru, FalseReg))
return false;
@@ -764,7 +762,7 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
// to the tail. In that case we always need to use tail undisturbed to
// preserve them.
uint64_t Policy = RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (PassthruReg == RISCV::NoRegister && RISCV::isVLKnownLE(VMergeVL, MinVL))
+ if (!PassthruReg && RISCV::isVLKnownLE(VMergeVL, MinVL))
Policy |= RISCVVType::TAIL_AGNOSTIC;
assert(RISCVII::hasVecPolicyOp(True.getDesc().TSFlags) &&
More information about the llvm-commits
mailing list