[llvm] 0c5319e - [ModuloSchedule][AArch64] Implement modulo variable expansion for pipelining (#65609)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 11 18:27:38 PDT 2024
Author: Yuta Mukai
Date: 2024-06-12T10:27:35+09:00
New Revision: 0c5319e546321d7a766999e49e0ccf801ff2b3dc
URL: https://github.com/llvm/llvm-project/commit/0c5319e546321d7a766999e49e0ccf801ff2b3dc
DIFF: https://github.com/llvm/llvm-project/commit/0c5319e546321d7a766999e49e0ccf801ff2b3dc.diff
LOG: [ModuloSchedule][AArch64] Implement modulo variable expansion for pipelining (#65609)
Modulo variable expansion is a technique that resolves overlap of
variable lifetimes by unrolling. The existing implementation solves it
by making a copy by move instruction for processors with ordinary
registers such as Arm and x86. This method may result in a very large
number of move instructions, which can cause performance problems.
Modulo variable expansion is enabled by specifying -pipeliner-mve-cg. A
backend must implement some newly defined interfaces in
PipelinerLoopInfo. They were implemented for AArch64.
Discourse thread:
https://discourse.llvm.org/t/implementing-modulo-variable-expansion-for-machinepipeliner
Added:
llvm/test/CodeGen/AArch64/sms-mve1.mir
llvm/test/CodeGen/AArch64/sms-mve10.mir
llvm/test/CodeGen/AArch64/sms-mve11.mir
llvm/test/CodeGen/AArch64/sms-mve12.mir
llvm/test/CodeGen/AArch64/sms-mve2.mir
llvm/test/CodeGen/AArch64/sms-mve3.mir
llvm/test/CodeGen/AArch64/sms-mve4.mir
llvm/test/CodeGen/AArch64/sms-mve5.mir
llvm/test/CodeGen/AArch64/sms-mve6.mir
llvm/test/CodeGen/AArch64/sms-mve7.mir
llvm/test/CodeGen/AArch64/sms-mve8.mir
llvm/test/CodeGen/AArch64/sms-mve9.mir
llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir
Modified:
llvm/include/llvm/CodeGen/ModuloSchedule.h
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/MachinePipeliner.cpp
llvm/lib/CodeGen/ModuloSchedule.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir
llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir
llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index fd424163f0d19..e9f0f089adfef 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -370,6 +370,78 @@ class PeelingModuloScheduleExpander {
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
};
+/// Expand the kernel using modulo variable expansion algorithm (MVE).
+/// It unrolls the kernel enough to avoid overlap of register lifetime.
+class ModuloScheduleExpanderMVE {
+private:
+ using ValueMapTy = DenseMap<unsigned, unsigned>;
+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
+
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII = nullptr;
+ LiveIntervals &LIS;
+
+ MachineBasicBlock *OrigKernel = nullptr;
+ MachineBasicBlock *OrigPreheader = nullptr;
+ MachineBasicBlock *OrigExit = nullptr;
+ MachineBasicBlock *Check = nullptr;
+ MachineBasicBlock *Prolog = nullptr;
+ MachineBasicBlock *NewKernel = nullptr;
+ MachineBasicBlock *Epilog = nullptr;
+ MachineBasicBlock *NewPreheader = nullptr;
+ MachineBasicBlock *NewExit = nullptr;
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
+
+ /// The number of unroll required to avoid overlap of live ranges.
+ /// NumUnroll = 1 means no unrolling.
+ int NumUnroll;
+
+ void calcNumUnroll();
+ void generatePipelinedLoop();
+ void generateProlog(SmallVectorImpl<ValueMapTy> &VRMap);
+ void generatePhi(MachineInstr *OrigMI, int UnrollNum,
+ SmallVectorImpl<ValueMapTy> &PrologVRMap,
+ SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ SmallVectorImpl<ValueMapTy> &PhiVRMap);
+ void generateKernel(SmallVectorImpl<ValueMapTy> &PrologVRMap,
+ SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ InstrMapTy &LastStage0Insts);
+ void generateEpilog(SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ SmallVectorImpl<ValueMapTy> &EpilogVRMap,
+ InstrMapTy &LastStage0Insts);
+ void mergeRegUsesAfterPipeline(Register OrigReg, Register NewReg);
+
+ MachineInstr *cloneInstr(MachineInstr *OldMI);
+
+ void updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef);
+
+ void generateKernelPhi(Register OrigLoopVal, Register NewLoopVal,
+ unsigned UnrollNum,
+ SmallVectorImpl<ValueMapTy> &VRMapProlog,
+ SmallVectorImpl<ValueMapTy> &VRMapPhi);
+ void updateInstrUse(MachineInstr *MI, int StageNum, int PhaseNum,
+ SmallVectorImpl<ValueMapTy> &CurVRMap,
+ SmallVectorImpl<ValueMapTy> *PrevVRMap);
+
+ void insertCondBranch(MachineBasicBlock &MBB, int RequiredTC,
+ InstrMapTy &LastStage0Insts,
+ MachineBasicBlock &GreaterThan,
+ MachineBasicBlock &Otherwise);
+
+public:
+ ModuloScheduleExpanderMVE(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals &LIS)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS) {}
+
+ void expand();
+ static bool canApply(MachineLoop &L);
+};
+
/// Expander that simply annotates each scheduled instruction with a post-instr
/// symbol that can be consumed by the ModuloScheduleTest pass.
///
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index d5b1df2114e9e..75cb17f357241 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -767,6 +767,26 @@ class TargetInstrInfo : public MCInstrInfo {
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
SmallVectorImpl<MachineOperand> &Cond) = 0;
+ /// Create a condition to determine if the remaining trip count for a phase
+ /// is greater than TC. Some instructions such as comparisons may be
+ /// inserted at the bottom of MBB. All instructions expanded for the
+ /// phase must be inserted in MBB before calling this function.
+ /// LastStage0Insts is the map from the original instructions scheduled at
+ /// stage#0 to the expanded instructions for the last iteration of the
+ /// kernel. LastStage0Insts is intended to obtain the instruction that
+ /// refers the latest loop counter value.
+ ///
+ /// MBB can also be a predecessor of the prologue block. Then
+ /// LastStage0Insts must be empty and the compared value is the initial
+ /// value of the trip count.
+ virtual void createRemainingIterationsGreaterCondition(
+ int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
+ DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) {
+ llvm_unreachable(
+ "Target didn't implement "
+ "PipelinerLoopInfo::createRemainingIterationsGreaterCondition!");
+ }
+
/// Modify the loop such that the trip count is
/// OriginalTC + TripCountAdjust.
virtual void adjustTripCount(int TripCountAdjust) = 0;
@@ -780,6 +800,10 @@ class TargetInstrInfo : public MCInstrInfo {
/// Once this function is called, no other functions on this object are
/// valid; the loop has been removed.
virtual void disposed() = 0;
+
+ /// Return true if the target can expand pipelined schedule with modulo
+ /// variable expansion.
+ virtual bool isMVEExpanderSupported() { return false; }
};
/// Analyze loop L, which must be a single-basic-block loop, and if the
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 32f65f0d49139..6c24cfca793fc 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -192,6 +192,10 @@ static cl::opt<int>
cl::desc("Margin representing the unused percentage of "
"the register pressure limit"));
+static cl::opt<bool>
+ MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
+ cl::desc("Use the MVE code generator for software pipelining"));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -677,6 +681,11 @@ void SwingSchedulerDAG::schedule() {
if (ExperimentalCodeGen && NewInstrChanges.empty()) {
PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
MSE.expand();
+ } else if (MVECodeGen && NewInstrChanges.empty() &&
+ LoopPipelinerInfo->isMVEExpanderSupported() &&
+ ModuloScheduleExpanderMVE::canApply(Loop)) {
+ ModuloScheduleExpanderMVE MSE(MF, MS, LIS);
+ MSE.expand();
} else {
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
MSE.expand();
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index b912112b16362..0aed235ec39b5 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -22,6 +22,10 @@
#define DEBUG_TYPE "pipeliner"
using namespace llvm;
+static cl::opt<bool> SwapBranchTargetsMVE(
+ "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
+ cl::desc("Swap target blocks of a conditional branch for MVE expander"));
+
void ModuloSchedule::print(raw_ostream &OS) {
for (MachineInstr *MI : ScheduledInstrs)
OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
@@ -2097,6 +2101,642 @@ void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
MSE.cleanup();
}
+MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+
+ // TODO: Offset information needs to be corrected.
+ NewMI->dropMemRefs(MF);
+
+ return NewMI;
+}
+
+/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
+/// If it is already dedicated exit, return it. Otherwise, insert a new
+/// block between them and return the new block.
+static MachineBasicBlock *createDedicatedExit(MachineBasicBlock *Loop,
+ MachineBasicBlock *Exit) {
+ if (Exit->pred_size() == 1)
+ return Exit;
+
+ MachineFunction *MF = Loop->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ MachineBasicBlock *NewExit =
+ MF->CreateMachineBasicBlock(Loop->getBasicBlock());
+ MF->insert(Loop->getIterator(), NewExit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ TII->analyzeBranch(*Loop, TBB, FBB, Cond);
+ if (TBB == Loop)
+ FBB = NewExit;
+ else if (FBB == Loop)
+ TBB = NewExit;
+ else
+ llvm_unreachable("unexpected loop structure");
+ TII->removeBranch(*Loop);
+ TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
+ Loop->replaceSuccessor(Exit, NewExit);
+ TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
+ NewExit->addSuccessor(Exit);
+
+ Exit->replacePhiUsesWith(Loop, NewExit);
+
+ return NewExit;
+}
+
+/// Insert branch code into the end of MBB. It branches to GreaterThan if the
+/// remaining trip count for instructions in LastStage0Insts is greater than
+/// RequiredTC, and to Otherwise otherwise.
+void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
+ int RequiredTC,
+ InstrMapTy &LastStage0Insts,
+ MachineBasicBlock &GreaterThan,
+ MachineBasicBlock &Otherwise) {
+ SmallVector<MachineOperand, 4> Cond;
+ LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
+ LastStage0Insts);
+
+ if (SwapBranchTargetsMVE) {
+ // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
+ // FBB for optimal performance.
+ if (TII->reverseBranchCondition(Cond))
+ llvm_unreachable("can not reverse branch condition");
+ TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
+ } else {
+ TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
+ }
+}
+
+/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
+/// other necessary blocks. The control flow is modified to execute the
+/// pipelined loop if the trip count satisfies the condition, otherwise the
+/// original loop. The original loop is also used to execute the remainder
+/// iterations which occur due to unrolling.
+void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
+ // The control flow for pipelining with MVE:
+ //
+ // OrigPreheader:
+ // // The block that is originally the loop preheader
+ // goto Check
+ //
+ // Check:
+ // // Check whether the trip count satisfies the requirements to pipeline.
+ // if (LoopCounter > NumStages + NumUnroll - 2)
+ // // The minimum number of iterations to pipeline =
+ // // iterations executed in prolog/epilog (NumStages-1) +
+ // // iterations executed in one kernel run (NumUnroll)
+ // goto Prolog
+ // // fallback to the original loop
+ // goto NewPreheader
+ //
+ // Prolog:
+ // // All prolog stages. There are no direct branches to the epilogue.
+ // goto NewKernel
+ //
+ // NewKernel:
+ // // NumUnroll copies of the kernel
+ // if (LoopCounter > MVE-1)
+ // goto NewKernel
+ // goto Epilog
+ //
+ // Epilog:
+ // // All epilog stages.
+ // if (LoopCounter > 0)
+ // // The remainder is executed in the original loop
+ // goto NewPreheader
+ // goto NewExit
+ //
+ // NewPreheader:
+ // // Newly created preheader for the original loop.
+ // // The initial values of the phis in the loop are merged from two paths.
+ // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
+ // goto OrigKernel
+ //
+ // OrigKernel:
+ // // The original loop block.
+ // if (LoopCounter != 0)
+ // goto OrigKernel
+ // goto NewExit
+ //
+ // NewExit:
+ // // Newly created dedicated exit for the original loop.
+ // // Merge values which are referenced after the loop
+ // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
+ // goto OrigExit
+ //
+ // OrigExit:
+ // // The block that is originally the loop exit.
+ // // If it is already deicated exit, NewExit is not created.
+
+ // An example of where each stage is executed:
+ // Assume #Stages 3, #MVE 4, #Iterations 12
+ // Iter 0 1 2 3 4 5 6 7 8 9 10-11
+ // -------------------------------------------------
+ // Stage 0 Prolog#0
+ // Stage 1 0 Prolog#1
+ // Stage 2 1 0 Kernel Unroll#0 Iter#0
+ // Stage 2 1 0 Kernel Unroll#1 Iter#0
+ // Stage 2 1 0 Kernel Unroll#2 Iter#0
+ // Stage 2 1 0 Kernel Unroll#3 Iter#0
+ // Stage 2 1 0 Kernel Unroll#0 Iter#1
+ // Stage 2 1 0 Kernel Unroll#1 Iter#1
+ // Stage 2 1 0 Kernel Unroll#2 Iter#1
+ // Stage 2 1 0 Kernel Unroll#3 Iter#1
+ // Stage 2 1 Epilog#0
+ // Stage 2 Epilog#1
+ // Stage 0-2 OrigKernel
+
+ LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
+ assert(LoopInfo && "Must be able to analyze loop!");
+
+ calcNumUnroll();
+
+ Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+ NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
+
+ MF.insert(OrigKernel->getIterator(), Check);
+ MF.insert(OrigKernel->getIterator(), Prolog);
+ MF.insert(OrigKernel->getIterator(), NewKernel);
+ MF.insert(OrigKernel->getIterator(), Epilog);
+ MF.insert(OrigKernel->getIterator(), NewPreheader);
+
+ NewExit = createDedicatedExit(OrigKernel, OrigExit);
+
+ NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
+ TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
+
+ OrigPreheader->addSuccessor(Check);
+ TII->removeBranch(*OrigPreheader);
+ TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
+
+ Check->addSuccessor(Prolog);
+ Check->addSuccessor(NewPreheader);
+
+ Prolog->addSuccessor(NewKernel);
+
+ NewKernel->addSuccessor(NewKernel);
+ NewKernel->addSuccessor(Epilog);
+
+ Epilog->addSuccessor(NewPreheader);
+ Epilog->addSuccessor(NewExit);
+
+ InstrMapTy LastStage0Insts;
+ insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
+ LastStage0Insts, *Prolog, *NewPreheader);
+
+ // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
+ // register#
+ SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
+ generateProlog(PrologVRMap);
+ generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
+ generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
+}
+
+/// Replace MI's use operands according to the maps.
+void ModuloScheduleExpanderMVE::updateInstrUse(
+ MachineInstr *MI, int StageNum, int PhaseNum,
+ SmallVectorImpl<ValueMapTy> &CurVRMap,
+ SmallVectorImpl<ValueMapTy> *PrevVRMap) {
+ // If MI is in the prolog/kernel/epilog block, CurVRMap is
+ // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
+ // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
+ // Refer to the appropriate map according to the stage
diff erence between
+ // MI and the definition of an operand.
+
+ for (MachineOperand &UseMO : MI->uses()) {
+ if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
+ continue;
+ int DiffStage = 0;
+ Register OrigReg = UseMO.getReg();
+ MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
+ if (!DefInst || DefInst->getParent() != OrigKernel)
+ continue;
+ unsigned InitReg = 0;
+ unsigned DefReg = OrigReg;
+ if (DefInst->isPHI()) {
+ ++DiffStage;
+ unsigned LoopReg;
+ getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
+ // LoopReg is guaranteed to be defined within the loop by canApply()
+ DefReg = LoopReg;
+ DefInst = MRI.getVRegDef(LoopReg);
+ }
+ unsigned DefStageNum = Schedule.getStage(DefInst);
+ DiffStage += StageNum - DefStageNum;
+ Register NewReg;
+ if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
+ // NewReg is defined in a previous phase of the same block
+ NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
+ else if (!PrevVRMap)
+ // Since this is the first iteration, refer the initial register of the
+ // loop
+ NewReg = InitReg;
+ else
+ // Cases where DiffStage is larger than PhaseNum.
+ // If MI is in the kernel block, the value is defined by the previous
+ // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
+ // value is defined in the kernel block and KernelVRMap is referenced.
+ NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
+
+ const TargetRegisterClass *NRC =
+ MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
+ if (NRC)
+ UseMO.setReg(NewReg);
+ else {
+ Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
+ BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ SplitReg)
+ .addReg(NewReg);
+ UseMO.setReg(SplitReg);
+ }
+ }
+}
+
+/// Return a phi if Reg is referenced by the phi.
+/// canApply() guarantees that at most only one such phi exists.
+static MachineInstr *getLoopPhiUser(Register Reg, MachineBasicBlock *Loop) {
+ for (MachineInstr &Phi : Loop->phis()) {
+ unsigned InitVal, LoopVal;
+ getPhiRegs(Phi, Loop, InitVal, LoopVal);
+ if (LoopVal == Reg)
+ return Φ
+ }
+ return nullptr;
+}
+
+/// Generate phis for registers defined by OrigMI.
+void ModuloScheduleExpanderMVE::generatePhi(
+ MachineInstr *OrigMI, int UnrollNum,
+ SmallVectorImpl<ValueMapTy> &PrologVRMap,
+ SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ SmallVectorImpl<ValueMapTy> &PhiVRMap) {
+ int StageNum = Schedule.getStage(OrigMI);
+ bool UsePrologReg;
+ if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
+ UsePrologReg = true;
+ else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
+ UsePrologReg = false;
+ else
+ return;
+
+ // Examples that show which stages are merged by phi.
+ // Meaning of the symbol following the stage number:
+ // a/b: Stages with the same letter are merged (UsePrologReg == true)
+ // +: Merged with the initial value (UsePrologReg == false)
+ // *: No phis required
+ //
+ // #Stages 3, #MVE 4
+ // Iter 0 1 2 3 4 5 6 7 8
+ // -----------------------------------------
+ // Stage 0a Prolog#0
+ // Stage 1a 0b Prolog#1
+ // Stage 2* 1* 0* Kernel Unroll#0
+ // Stage 2* 1* 0+ Kernel Unroll#1
+ // Stage 2* 1+ 0a Kernel Unroll#2
+ // Stage 2+ 1a 0b Kernel Unroll#3
+ //
+ // #Stages 3, #MVE 2
+ // Iter 0 1 2 3 4 5 6 7 8
+ // -----------------------------------------
+ // Stage 0a Prolog#0
+ // Stage 1a 0b Prolog#1
+ // Stage 2* 1+ 0a Kernel Unroll#0
+ // Stage 2+ 1a 0b Kernel Unroll#1
+ //
+ // #Stages 3, #MVE 1
+ // Iter 0 1 2 3 4 5 6 7 8
+ // -----------------------------------------
+ // Stage 0* Prolog#0
+ // Stage 1a 0b Prolog#1
+ // Stage 2+ 1a 0b Kernel Unroll#0
+
+ for (MachineOperand &DefMO : OrigMI->defs()) {
+ if (!DefMO.isReg() || DefMO.isDead())
+ continue;
+ Register OrigReg = DefMO.getReg();
+ auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
+ if (NewReg == KernelVRMap[UnrollNum].end())
+ continue;
+ Register CorrespondReg;
+ if (UsePrologReg) {
+ int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
+ CorrespondReg = PrologVRMap[PrologNum][OrigReg];
+ } else {
+ MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
+ if (!Phi)
+ continue;
+ CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
+ }
+
+ assert(CorrespondReg.isValid());
+ Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
+ BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), PhiReg)
+ .addReg(NewReg->second)
+ .addMBB(NewKernel)
+ .addReg(CorrespondReg)
+ .addMBB(Prolog);
+ PhiVRMap[UnrollNum][OrigReg] = PhiReg;
+ }
+}
+
+static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
+ MachineBasicBlock *NewMBB) {
+ for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
+ if (Phi.getOperand(Idx).getReg() == OrigReg) {
+ Phi.getOperand(Idx).setReg(NewReg);
+ Phi.getOperand(Idx + 1).setMBB(NewMBB);
+ return;
+ }
+ }
+}
+
+/// Generate phis that merge values from multiple routes
+void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
+ Register NewReg) {
+ SmallVector<MachineOperand *> UsesAfterLoop;
+ SmallVector<MachineInstr *> LoopPhis;
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
+ E = MRI.use_end();
+ I != E; ++I) {
+ MachineOperand &O = *I;
+ if (O.getParent()->getParent() != OrigKernel &&
+ O.getParent()->getParent() != Prolog &&
+ O.getParent()->getParent() != NewKernel &&
+ O.getParent()->getParent() != Epilog)
+ UsesAfterLoop.push_back(&O);
+ if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
+ LoopPhis.push_back(O.getParent());
+ }
+
+ // Merge the route that only execute the pipelined loop (when there are no
+ // remaining iterations) with the route that execute the original loop.
+ if (!UsesAfterLoop.empty()) {
+ Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
+ BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), PhiReg)
+ .addReg(OrigReg)
+ .addMBB(OrigKernel)
+ .addReg(NewReg)
+ .addMBB(Epilog);
+
+ for (MachineOperand *MO : UsesAfterLoop)
+ MO->setReg(PhiReg);
+
+ if (!LIS.hasInterval(PhiReg))
+ LIS.createEmptyInterval(PhiReg);
+ }
+
+ // Merge routes from the pipelined loop and the bypassed route before the
+ // original loop
+ if (!LoopPhis.empty()) {
+ for (MachineInstr *Phi : LoopPhis) {
+ unsigned InitReg, LoopReg;
+ getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
+ Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
+ BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
+ TII->get(TargetOpcode::PHI), NewInit)
+ .addReg(InitReg)
+ .addMBB(Check)
+ .addReg(NewReg)
+ .addMBB(Epilog);
+ replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
+ }
+ }
+}
+
+void ModuloScheduleExpanderMVE::generateProlog(
+ SmallVectorImpl<ValueMapTy> &PrologVRMap) {
+ PrologVRMap.clear();
+ PrologVRMap.resize(Schedule.getNumStages() - 1);
+ DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
+ for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
+ ++PrologNum) {
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ if (StageNum > PrologNum)
+ continue;
+ MachineInstr *NewMI = cloneInstr(MI);
+ updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
+ NewMIMap[NewMI] = {PrologNum, StageNum};
+ Prolog->push_back(NewMI);
+ }
+ }
+
+ for (auto I : NewMIMap) {
+ MachineInstr *MI = I.first;
+ int PrologNum = I.second.first;
+ int StageNum = I.second.second;
+ updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "prolog:\n";
+ Prolog->dump();
+ });
+}
+
+void ModuloScheduleExpanderMVE::generateKernel(
+ SmallVectorImpl<ValueMapTy> &PrologVRMap,
+ SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
+ KernelVRMap.clear();
+ KernelVRMap.resize(NumUnroll);
+ SmallVector<ValueMapTy> PhiVRMap;
+ PhiVRMap.resize(NumUnroll);
+ DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
+ DenseMap<MachineInstr *, MachineInstr *> MIMapLastStage0;
+ for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ MachineInstr *NewMI = cloneInstr(MI);
+ if (UnrollNum == NumUnroll - 1)
+ LastStage0Insts[MI] = NewMI;
+ updateInstrDef(NewMI, KernelVRMap[UnrollNum],
+ (UnrollNum == NumUnroll - 1 && StageNum == 0));
+ generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
+ NewMIMap[NewMI] = {UnrollNum, StageNum};
+ NewKernel->push_back(NewMI);
+ }
+ }
+
+ for (auto I : NewMIMap) {
+ MachineInstr *MI = I.first;
+ int UnrollNum = I.second.first;
+ int StageNum = I.second.second;
+ updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
+ }
+
+ // If remaining trip count is greater than NumUnroll-1, loop continues
+ insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
+ *Epilog);
+
+ LLVM_DEBUG({
+ dbgs() << "kernel:\n";
+ NewKernel->dump();
+ });
+}
+
+void ModuloScheduleExpanderMVE::generateEpilog(
+ SmallVectorImpl<ValueMapTy> &KernelVRMap,
+ SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
+ EpilogVRMap.clear();
+ EpilogVRMap.resize(Schedule.getNumStages() - 1);
+ DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
+ for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
+ ++EpilogNum) {
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ if (StageNum <= EpilogNum)
+ continue;
+ MachineInstr *NewMI = cloneInstr(MI);
+ updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
+ NewMIMap[NewMI] = {EpilogNum, StageNum};
+ Epilog->push_back(NewMI);
+ }
+ }
+
+ for (auto I : NewMIMap) {
+ MachineInstr *MI = I.first;
+ int EpilogNum = I.second.first;
+ int StageNum = I.second.second;
+ updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
+ }
+
+ // If there are remaining iterations, they are executed in the original loop.
+ // Instructions related to loop control, such as loop counter comparison,
+ // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
+ // in stage 0. Thus, the map is for the last one in the kernel.
+ insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
+
+ LLVM_DEBUG({
+ dbgs() << "epilog:\n";
+ Epilog->dump();
+ });
+}
+
+/// Calculate the number of unroll required and set it to NumUnroll
+void ModuloScheduleExpanderMVE::calcNumUnroll() {
+ DenseMap<MachineInstr *, unsigned> Inst2Idx;
+ NumUnroll = 1;
+ for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
+ Inst2Idx[Schedule.getInstructions()[I]] = I;
+
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ int StageNum = Schedule.getStage(MI);
+ for (const MachineOperand &MO : MI->uses()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
+ if (DefMI->getParent() != OrigKernel)
+ continue;
+
+ int NumUnrollLocal = 1;
+ if (DefMI->isPHI()) {
+ ++NumUnrollLocal;
+ // canApply() guarantees that DefMI is not phi and is an instruction in
+ // the loop
+ DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
+ }
+ NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
+ if (Inst2Idx[MI] <= Inst2Idx[DefMI])
+ --NumUnrollLocal;
+ NumUnroll = std::max(NumUnroll, NumUnrollLocal);
+ }
+ }
+ LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
+}
+
+/// Create new virtual registers for definitions of NewMI and update NewMI.
+/// If the definitions are referenced after the pipelined loop, phis are
+/// created to merge with other routes.
+void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
+ ValueMapTy &VRMap,
+ bool LastDef) {
+ for (MachineOperand &MO : NewMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ Register NewReg = MRI.createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ VRMap[Reg] = NewReg;
+ if (LastDef)
+ mergeRegUsesAfterPipeline(Reg, NewReg);
+ }
+}
+
+void ModuloScheduleExpanderMVE::expand() {
+ OrigKernel = Schedule.getLoop()->getTopBlock();
+ OrigPreheader = Schedule.getLoop()->getLoopPreheader();
+ OrigExit = Schedule.getLoop()->getExitBlock();
+
+ LLVM_DEBUG(Schedule.dump());
+
+ generatePipelinedLoop();
+}
+
+/// Check if ModuloScheduleExpanderMVE can be applied to L
+bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) {
+ if (!L.getExitBlock()) {
+ LLVM_DEBUG(
+ dbgs() << "Can not apply MVE expander: No single exit block.\n";);
+ return false;
+ }
+
+ MachineBasicBlock *BB = L.getTopBlock();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+ // Put some constraints on the operands of the phis to simplify the
+ // transformation
+ DenseSet<unsigned> UsedByPhi;
+ for (MachineInstr &MI : BB->phis()) {
+ // Registers defined by phis must be used only inside the loop and be never
+ // used by phis.
+ for (MachineOperand &MO : MI.defs())
+ if (MO.isReg())
+ for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
+ if (Ref.getParent() != BB || Ref.isPHI()) {
+ LLVM_DEBUG(dbgs()
+ << "Can not apply MVE expander: A phi result is "
+ "referenced outside of the loop or by phi.\n";);
+ return false;
+ }
+
+ // A source register from the loop block must be defined inside the loop.
+ // A register defined inside the loop must be referenced by only one phi at
+ // most.
+ unsigned InitVal, LoopVal;
+ getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
+ if (!Register(LoopVal).isVirtual() ||
+ MRI.getVRegDef(LoopVal)->getParent() != BB) {
+ LLVM_DEBUG(
+ dbgs() << "Can not apply MVE expander: A phi source value coming "
+ "from the loop is not defined in the loop.\n";);
+ return false;
+ }
+ if (UsedByPhi.count(LoopVal)) {
+ LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
+ "loop is referenced by two or more phis.\n";);
+ return false;
+ }
+ UsedByPhi.insert(LoopVal);
+ }
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// ModuloScheduleTestPass implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 7d540efe2b41e..a5135b78bded9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -17,6 +17,7 @@
#include "AArch64PointerAuth.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
@@ -9582,18 +9583,49 @@ AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,
namespace {
class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
- MachineInstr *PredBranch;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo &MRI;
+
+ /// The block of the loop
+ MachineBasicBlock *LoopBB;
+ /// The conditional branch of the loop
+ MachineInstr *CondBranch;
+ /// The compare instruction for loop control
+ MachineInstr *Comp;
+ /// The number of the operand of the loop counter value in Comp
+ unsigned CompCounterOprNum;
+ /// The instruction that updates the loop counter value
+ MachineInstr *Update;
+ /// The number of the operand of the loop counter value in Update
+ unsigned UpdateCounterOprNum;
+ /// The initial value of the loop counter
+ Register Init;
+ /// True iff Update is a predecessor of Comp
+ bool IsUpdatePriorComp;
+
+ /// The normalized condition used by createTripCountGreaterCondition()
SmallVector<MachineOperand, 4> Cond;
public:
- AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
+ AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
+ MachineInstr *Comp, unsigned CompCounterOprNum,
+ MachineInstr *Update, unsigned UpdateCounterOprNum,
+ Register Init, bool IsUpdatePriorComp,
const SmallVectorImpl<MachineOperand> &Cond)
- : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {}
+ : MF(Comp->getParent()->getParent()),
+ TII(MF->getSubtarget().getInstrInfo()),
+ TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
+ LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
+ CompCounterOprNum(CompCounterOprNum), Update(Update),
+ UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
+ IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
// Make the instructions for loop control be placed in stage 0.
- // The predecessors of PredBranch are considered by the caller.
- return MI == PredBranch;
+ // The predecessors of Comp are considered by the caller.
+ return MI == Comp;
}
std::optional<bool> createTripCountGreaterCondition(
@@ -9606,31 +9638,277 @@ class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
return {};
}
+ void createRemainingIterationsGreaterCondition(
+ int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
+ DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
+
void setPreheader(MachineBasicBlock *NewPreheader) override {}
void adjustTripCount(int TripCountAdjust) override {}
void disposed() override {}
+ bool isMVEExpanderSupported() override { return true; }
};
} // namespace
-static bool isCompareAndBranch(unsigned Opcode) {
- switch (Opcode) {
- case AArch64::CBZW:
- case AArch64::CBZX:
- case AArch64::CBNZW:
- case AArch64::CBNZX:
- case AArch64::TBZW:
- case AArch64::TBZX:
- case AArch64::TBNZW:
- case AArch64::TBNZX:
- return true;
+/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
+/// is replaced by ReplaceReg. The output register is newly created.
+/// The other operands are unchanged from MI.
+static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
+ Register ReplaceReg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertTo) {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
+ Register Result = 0;
+ for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
+ if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
+ Result = MRI.createVirtualRegister(
+ MRI.getRegClass(NewMI->getOperand(0).getReg()));
+ NewMI->getOperand(I).setReg(Result);
+ } else if (I == ReplaceOprNum) {
+ MRI.constrainRegClass(
+ ReplaceReg,
+ TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
+ NewMI->getOperand(I).setReg(ReplaceReg);
+ }
}
- return false;
+ MBB.insert(InsertTo, NewMI);
+ return Result;
+}
+
+void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
+ int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
+ DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) {
+ // Create and accumulate conditions for next TC iterations.
+ // Example:
+ // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
+ // # iteration of the kernel
+ //
+ // # insert the following instructions
+ // cond = CSINCXr 0, 0, C, implicit $nzcv
+ // counter = ADDXri counter, 1 # clone from this->Update
+ // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
+ // cond = CSINCXr cond, cond, C, implicit $nzcv
+ // ... (repeat TC times)
+ // SUBSXri cond, 0, implicit-def $nzcv
+
+ assert(CondBranch->getOpcode() == AArch64::Bcc);
+ // CondCode to exit the loop
+ AArch64CC::CondCode CC =
+ (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
+ if (CondBranch->getOperand(1).getMBB() == LoopBB)
+ CC = AArch64CC::getInvertedCondCode(CC);
+
+ // Accumulate conditions to exit the loop
+ Register AccCond = AArch64::XZR;
+
+ // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
+ auto AccumulateCond = [&](Register CurCond,
+ AArch64CC::CondCode CC) -> Register {
+ Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
+ BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
+ .addReg(NewCond, RegState::Define)
+ .addReg(CurCond)
+ .addReg(CurCond)
+ .addImm(AArch64CC::getInvertedCondCode(CC));
+ return NewCond;
+ };
+
+ if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
+ // Update and Comp for I==0 are already exists in MBB
+ // (MBB is an unrolled kernel)
+ Register Counter;
+ for (int I = 0; I <= TC; ++I) {
+ Register NextCounter;
+ if (I != 0)
+ NextCounter =
+ cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
+
+ AccCond = AccumulateCond(AccCond, CC);
+
+ if (I != TC) {
+ if (I == 0) {
+ if (Update != Comp && IsUpdatePriorComp) {
+ Counter =
+ LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
+ NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
+ MBB.end());
+ } else {
+ // can use already calculated value
+ NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
+ }
+ } else if (Update != Comp) {
+ NextCounter =
+ cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
+ }
+ }
+ Counter = NextCounter;
+ }
+ } else {
+ Register Counter;
+ if (LastStage0Insts.empty()) {
+ // use initial counter value (testing if the trip count is sufficient to
+ // be executed by pipelined code)
+ Counter = Init;
+ if (IsUpdatePriorComp)
+ Counter =
+ cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
+ } else {
+ // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
+ Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
+ }
+
+ for (int I = 0; I <= TC; ++I) {
+ Register NextCounter;
+ NextCounter =
+ cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
+ AccCond = AccumulateCond(AccCond, CC);
+ if (I != TC && Update != Comp)
+ NextCounter =
+ cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
+ Counter = NextCounter;
+ }
+ }
+
+ // If AccCond == 0, the remainder is greater than TC.
+ BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
+ .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
+ .addReg(AccCond)
+ .addImm(0)
+ .addImm(0);
+ Cond.clear();
+ Cond.push_back(MachineOperand::CreateImm(AArch64CC::EQ));
+}
+
+static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
+ Register &RegMBB, Register &RegOther) {
+ assert(Phi.getNumOperands() == 5);
+ if (Phi.getOperand(2).getMBB() == MBB) {
+ RegMBB = Phi.getOperand(1).getReg();
+ RegOther = Phi.getOperand(3).getReg();
+ } else {
+ assert(Phi.getOperand(4).getMBB() == MBB);
+ RegMBB = Phi.getOperand(3).getReg();
+ RegOther = Phi.getOperand(1).getReg();
+ }
+}
+
+static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
+ if (!Reg.isVirtual())
+ return false;
+ const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ return MRI.getVRegDef(Reg)->getParent() != BB;
+}
+
+/// If Reg is an induction variable, return true and set some parameters
+static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
+ MachineInstr *&UpdateInst,
+ unsigned &UpdateCounterOprNum, Register &InitReg,
+ bool &IsUpdatePriorComp) {
+ // Example:
+ //
+ // Preheader:
+ // InitReg = ...
+ // LoopBB:
+ // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
+ // Reg = COPY Reg0 ; COPY is ignored.
+ // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
+ // ; Reg is the value calculated in the previous
+ // ; iteration, so IsUpdatePriorComp == false.
+
+ if (LoopBB->pred_size() != 2)
+ return false;
+ if (!Reg.isVirtual())
+ return false;
+ const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
+ UpdateInst = nullptr;
+ UpdateCounterOprNum = 0;
+ InitReg = 0;
+ IsUpdatePriorComp = true;
+ Register CurReg = Reg;
+ while (true) {
+ MachineInstr *Def = MRI.getVRegDef(CurReg);
+ if (Def->getParent() != LoopBB)
+ return false;
+ if (Def->isCopy()) {
+ // Ignore copy instructions unless they contain subregisters
+ if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
+ return false;
+ CurReg = Def->getOperand(1).getReg();
+ } else if (Def->isPHI()) {
+ if (InitReg != 0)
+ return false;
+ if (!UpdateInst)
+ IsUpdatePriorComp = false;
+ extractPhiReg(*Def, LoopBB, CurReg, InitReg);
+ } else {
+ if (UpdateInst)
+ return false;
+ switch (Def->getOpcode()) {
+ case AArch64::ADDSXri:
+ case AArch64::ADDSWri:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSWri:
+ case AArch64::ADDXri:
+ case AArch64::ADDWri:
+ case AArch64::SUBXri:
+ case AArch64::SUBWri:
+ UpdateInst = Def;
+ UpdateCounterOprNum = 1;
+ break;
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::ADDXrr:
+ case AArch64::ADDWrr:
+ case AArch64::SUBXrr:
+ case AArch64::SUBWrr:
+ UpdateInst = Def;
+ if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
+ UpdateCounterOprNum = 1;
+ else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
+ UpdateCounterOprNum = 2;
+ else
+ return false;
+ break;
+ default:
+ return false;
+ }
+ CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
+ }
+
+ if (!CurReg.isVirtual())
+ return false;
+ if (Reg == CurReg)
+ break;
+ }
+
+ if (!UpdateInst)
+ return false;
+
+ return true;
}
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
+ // Accept loops that meet the following conditions
+ // * The conditional branch is BCC
+ // * The compare instruction is ADDS/SUBS/WHILEXX
+ // * One operand of the compare is an induction variable and the other is a
+ // loop invariant value
+ // * The induction variable is incremented/decremented by a single instruction
+ // * Does not contain CALL or instructions which have unmodeled side effects
+
+ for (MachineInstr &MI : *LoopBB)
+ if (MI.isCall() || MI.hasUnmodeledSideEffects())
+ // This instruction may use NZCV, which interferes with the instruction to
+ // be inserted for loop control.
+ return nullptr;
+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
@@ -9641,48 +9919,76 @@ AArch64InstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
return nullptr;
// Must be conditional branch
- if (FBB == nullptr)
+ if (TBB != LoopBB && FBB == nullptr)
return nullptr;
assert((TBB == LoopBB || FBB == LoopBB) &&
"The Loop must be a single-basic-block loop");
+ MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+
+ if (CondBranch->getOpcode() != AArch64::Bcc)
+ return nullptr;
+
// Normalization for createTripCountGreaterCondition()
if (TBB == LoopBB)
reverseBranchCondition(Cond);
- MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
- const TargetRegisterInfo &TRI = getRegisterInfo();
-
- // Find the immediate predecessor of the conditional branch
- MachineInstr *PredBranch = nullptr;
- if (CondBranch->getOpcode() == AArch64::Bcc) {
- for (MachineInstr &MI : reverse(*LoopBB)) {
- if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
- PredBranch = &MI;
+ MachineInstr *Comp = nullptr;
+ unsigned CompCounterOprNum = 0;
+ for (MachineInstr &MI : reverse(*LoopBB)) {
+ if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
+ // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
+ // operands is a loop invariant value
+
+ switch (MI.getOpcode()) {
+ case AArch64::SUBSXri:
+ case AArch64::SUBSWri:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSWri:
+ Comp = &MI;
+ CompCounterOprNum = 1;
break;
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ Comp = &MI;
+ break;
+ default:
+ if (isWhileOpcode(MI.getOpcode())) {
+ Comp = &MI;
+ break;
+ }
+ return nullptr;
}
- }
- if (!PredBranch)
- return nullptr;
- } else if (isCompareAndBranch(CondBranch->getOpcode())) {
- const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
- Register Reg = CondBranch->getOperand(0).getReg();
- if (!Reg.isVirtual())
- return nullptr;
- PredBranch = MRI.getVRegDef(Reg);
- // MachinePipeliner does not expect that the immediate predecessor is a Phi
- if (PredBranch->isPHI())
- return nullptr;
+ if (CompCounterOprNum == 0) {
+ if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
+ CompCounterOprNum = 2;
+ else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
+ CompCounterOprNum = 1;
+ else
+ return nullptr;
+ }
+ break;
+ }
+ }
+ if (!Comp)
+ return nullptr;
- if (PredBranch->getParent() != LoopBB)
- return nullptr;
- } else {
+ MachineInstr *Update = nullptr;
+ Register Init;
+ bool IsUpdatePriorComp;
+ unsigned UpdateCounterOprNum;
+ if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
+ Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
return nullptr;
- }
- return std::make_unique<AArch64PipelinerLoopInfo>(PredBranch, Cond);
+ return std::make_unique<AArch64PipelinerLoopInfo>(
+ LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
+ Init, IsUpdatePriorComp, Cond);
}
#define GET_INSTRINFO_HELPERS
diff --git a/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir b/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir
index 94dd299d1caa7..630a89364c8c9 100644
--- a/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir
+++ b/llvm/test/CodeGen/AArch64/sms-acceptable-loop3.mir
@@ -1,8 +1,11 @@
# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-enable-copytophi=0 -debug-only=pipeliner 2>&1 | FileCheck %s
# REQUIRES: asserts
-# An acceptable loop by pipeliner: TBB == ExitBB, FBB == LoopBB, Compare and branch
-# CHECK: Schedule Found? 1
+# Compare and branch instructions are not supported now.
+# CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+
+# (if supported) An acceptable loop by pipeliner: TBB == ExitBB, FBB == LoopBB, Compare and branch
+# CHECK(if supported): Schedule Found? 1
--- |
define dso_local void @func(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr #0 {
diff --git a/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir b/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir
index fbd74a777aa1e..cb0b1e3028753 100644
--- a/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir
+++ b/llvm/test/CodeGen/AArch64/sms-acceptable-loop4.mir
@@ -1,8 +1,11 @@
# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-enable-copytophi=0 -debug-only=pipeliner 2>&1 | FileCheck %s
# REQUIRES: asserts
-# An acceptable loop by pipeliner TBB == LoopBB, FBB == ExitBB, Compare and branch
-# CHECK: Schedule Found? 1
+# Compare and branch instructions are not supported now.
+# CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+
+# (if supported) An acceptable loop by pipeliner TBB == LoopBB, FBB == ExitBB, Compare and branch
+# CHECK(if supported): Schedule Found? 1
--- |
define dso_local void @func(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr #0 {
diff --git a/llvm/test/CodeGen/AArch64/sms-mve1.mir b/llvm/test/CodeGen/AArch64/sms-mve1.mir
new file mode 100644
index 0000000000000..c7f187c807ead
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve1.mir
@@ -0,0 +1,144 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# #stages: 2, unroll count: 3
+
+# the calculation result can be checked as follows (driver code written by C):
+# for (i=2; i<N; i++)
+# func_noswp(i, 1) == func(i, 1)
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr2]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr7]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr8]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr8]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr9]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXrr10]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr10]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64 = ADDXrr %12, %11
+ dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve10.mir b/llvm/test/CodeGen/AArch64/sms-mve10.mir
new file mode 100644
index 0000000000000..f94ad7c95bc14
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve10.mir
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# same as sms-mve1.mir except for the order of the operands
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr1]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr2]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr2]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr3]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr4]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[PHI3]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr5]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr5]]
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr6]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr6]]
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr7]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr8]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[ADDXrr8]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr9]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXrr10]]:gpr64 = ADDXrr [[COPY1]], [[PHI7]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr10]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64 = ADDXrr %11, %12
+ dead $xzr = SUBSXrr %13, %10, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve11.mir b/llvm/test/CodeGen/AArch64/sms-mve11.mir
new file mode 100644
index 0000000000000..ab8cff858c2e6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve11.mir
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# counter increment/compare instruction with immediate operand
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri [[COPY1]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri1]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXri2:%[0-9]+]]:gpr64common = ADDXri [[ADDXri1]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri2]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXri3:%[0-9]+]]:gpr64common = ADDXri [[ADDXri2]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri3]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXri4:%[0-9]+]]:gpr64common = ADDXri [[COPY1]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri4]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64common = PHI [[ADDXri6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64common = PHI [[ADDXri7:%[0-9]+]], %bb.5, [[ADDXri4]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXri5:%[0-9]+]]:gpr64common = ADDXri [[PHI3]], 1, 0
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri5]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXri6]]:gpr64common = ADDXri [[ADDXri5]], 1, 0
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri6]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXri7]]:gpr64common = ADDXri [[ADDXri6]], 1, 0
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri7]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXri8:%[0-9]+]]:gpr64common = ADDXri [[ADDXri7]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri8]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXri9:%[0-9]+]]:gpr64common = ADDXri [[ADDXri8]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri9]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri7]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64common = PHI [[COPY1]], %bb.3, [[ADDXri7]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64common = PHI [[PHI6]], %bb.7, [[ADDXri10:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXri10]]:gpr64common = ADDXri [[PHI7]], 1, 0
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[ADDXri10]], 20, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64common = COPY $x0
+ %11:gpr64common = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64common = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64common = ADDXri %12, 1, 0
+ dead $xzr = SUBSXri %13, 20, 0, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve12.mir b/llvm/test/CodeGen/AArch64/sms-mve12.mir
new file mode 100644
index 0000000000000..6fce7b5fb9a8e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve12.mir
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-v1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# the compare instruction is whilexx
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: [[CNTD_XPiI:%[0-9]+]]:gpr64common = CNTD_XPiI 31, 1, implicit $vg
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 4, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D1:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr1]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 4, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr1]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D2:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr2]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 4, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr2]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D3:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr3]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 4, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64common = ADDXrr [[COPY1]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D4:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr4]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64common = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64common = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64common = ADDXrr [[PHI3]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D5:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr5]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64common = ADDXrr [[ADDXrr5]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D6:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr6]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr7]]:gpr64common = ADDXrr [[ADDXrr6]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D7:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr7]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 4, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr7]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D8:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr8]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 4, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64common = ADDXrr [[ADDXrr8]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D9:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr9]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 4, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D10:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr7]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 4, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64common = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64common = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXrr10]]:gpr64common = ADDXrr [[PHI7]], [[CNTD_XPiI]]
+ ; CHECK-NEXT: dead [[WHILELO_PXX_D11:%[0-9]+]]:ppr = WHILELO_PXX_D [[ADDXrr10]], [[COPY]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 4, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64common = COPY $x0
+ %11:gpr64common = COPY $x1
+ %20:fpr64 = FMOVDi 1
+ %26:gpr64common = CNTD_XPiI 31, 1, implicit $vg
+
+ bb.1:
+ %12:gpr64common = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64common = ADDXrr %12, %26
+ dead %30:ppr = WHILELO_PXX_D %13, %10, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 4, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve2.mir b/llvm/test/CodeGen/AArch64/sms-mve2.mir
new file mode 100644
index 0000000000000..103f374196977
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve2.mir
@@ -0,0 +1,129 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=4 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# #stages: 2, unroll count: 2
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr2]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr3:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr4:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr6:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr5:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr7:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr3]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr3]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr6]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr3]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5]]:gpr64 = ADDXrr [[ADDXrr4]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr7]]:fpr64 = FADDDrr [[FADDDrr6]], [[FADDDrr3]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr6:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr3]], [[CSINCXr3]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr4]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr8:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr3]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr5]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr6]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr5]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr7:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr9:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr9]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr10:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr9]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr9]], %bb.1, [[FADDDrr6]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr11]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64 = ADDXrr %12, %11
+ dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve3.mir b/llvm/test/CodeGen/AArch64/sms-mve3.mir
new file mode 100644
index 0000000000000..017383b46be0c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve3.mir
@@ -0,0 +1,116 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# #stages: 2, unroll count: 1
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr1]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FADDDrr]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr2:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr3:%[0-9]+]], %bb.5, [[ADDXrr2]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr4:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr3]]:gpr64 = ADDXrr [[PHI1]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4]]:fpr64 = FADDDrr [[FADDDrr2]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr2]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[FADDDrr4]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr2]], %bb.6
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr3]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gpr64 = PHI [[PHI4]], %bb.7, [[ADDXrr4:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:fpr64 = PHI [[PHI3]], %bb.7, [[FADDDrr6:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXrr4]]:gpr64 = ADDXrr [[PHI5]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr6]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI6]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr6]], [[FADDDrr6]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr7]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[FADDDrr6]], %bb.1, [[FADDDrr2]], %bb.6
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[FADDDrr8]], %bb.1, [[FADDDrr5]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI7]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64 = ADDXrr %12, %11
+ dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %21, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %22, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve4.mir b/llvm/test/CodeGen/AArch64/sms-mve4.mir
new file mode 100644
index 0000000000000..203ce6ddfd2ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve4.mir
@@ -0,0 +1,130 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# no dedicated exit
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: dead [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.3, implicit $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr1]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.6(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FADDDrr]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr2:%[0-9]+]], %bb.6, [[FADDDrr]], %bb.5
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr3:%[0-9]+]], %bb.6, [[ADDXrr2]], %bb.5
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr4:%[0-9]+]], %bb.6, [[FADDDrr1]], %bb.5
+ ; CHECK-NEXT: [[FADDDrr2]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr3]]:gpr64 = ADDXrr [[PHI1]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4]]:fpr64 = FADDDrr [[FADDDrr2]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr2]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.6, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[FADDDrr4]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.8, implicit $nzcv
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.4, [[FADDDrr2]], %bb.7
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.4, [[ADDXrr3]], %bb.7
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.2, [[FADDDrr5]], %bb.7
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.9(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[PHI4]], %bb.8, [[ADDXrr4:%[0-9]+]], %bb.2
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[PHI3]], %bb.8, [[FADDDrr6:%[0-9]+]], %bb.2
+ ; CHECK-NEXT: [[ADDXrr4]]:gpr64 = ADDXrr [[PHI6]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr6]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI7]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr6]], [[FADDDrr6]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr7]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.0, [[PHI5]], %bb.9
+ ; CHECK-NEXT: $d0 = COPY [[PHI8]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+ dead %15:gpr64 = SUBSXrr %10, %11, implicit-def $nzcv
+ Bcc 0, %bb.3, implicit $nzcv
+
+ bb.1:
+
+ bb.2:
+ %12:gpr64 = PHI %11, %bb.1, %13, %bb.2
+ %24:fpr64 = PHI %20, %bb.1, %21, %bb.2
+ %13:gpr64 = ADDXrr %12, %11
+ dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %21, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %22, implicit $fpcr
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.3
+
+ bb.3:
+ %25:fpr64 = PHI %20, %bb.0, %23, %bb.2
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve5.mir b/llvm/test/CodeGen/AArch64/sms-mve5.mir
new file mode 100644
index 0000000000000..4795df70c07a4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve5.mir
@@ -0,0 +1,140 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# exit loop when condition holds
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr2]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr7:%[0-9]+]], %bb.5, [[ADDXrr4]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr7]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr7]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr8]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr9:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr8]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr9]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr7]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr10:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[ADDXrr10]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr10]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 0, %bb.2, implicit $nzcv
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64 = ADDXrr %12, %11
+ dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 0, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve6.mir b/llvm/test/CodeGen/AArch64/sms-mve6.mir
new file mode 100644
index 0000000000000..527e9e9d09dfc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve6.mir
@@ -0,0 +1,138 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# #stages: 2, unroll count: 3
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr5:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[PHI3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr4]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5]]:gpr64 = ADDXrr [[ADDXrr4]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr6]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr7:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[ADDXrr5]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr6]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr8:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY]], [[PHI7]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADDXrr8]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ dead $xzr = SUBSXrr %10, %12, implicit-def $nzcv
+ %13:gpr64 = ADDXrr %12, %11
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve7.mir b/llvm/test/CodeGen/AArch64/sms-mve7.mir
new file mode 100644
index 0000000000000..3b3d0ea09eaf0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve7.mir
@@ -0,0 +1,128 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# compare instruction also updates the counter
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSXrr1:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSXrr2:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr1]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSXrr3:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr2]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[SUBSXrr4:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[SUBSXrr6:%[0-9]+]], %bb.5, [[COPY]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[SUBSXrr7:%[0-9]+]], %bb.5, [[SUBSXrr4]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[SUBSXrr5:%[0-9]+]]:gpr64 = SUBSXrr [[PHI3]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[SUBSXrr6]]:gpr64 = SUBSXrr [[SUBSXrr5]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[SUBSXrr7]]:gpr64 = SUBSXrr [[SUBSXrr6]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSXrr8:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr7]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSXrr9:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr8]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[SUBSXrr10:%[0-9]+]]:gpr64 = SUBSXrr [[SUBSXrr6]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY]], %bb.3, [[SUBSXrr7]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[SUBSXrr11:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[SUBSXrr11]]:gpr64 = SUBSXrr [[PHI7]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %10, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %13:gpr64 = SUBSXrr %12, %11, implicit-def $nzcv
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve8.mir b/llvm/test/CodeGen/AArch64/sms-mve8.mir
new file mode 100644
index 0000000000000..c1ea6defac1fb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve8.mir
@@ -0,0 +1,138 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# compare instruction also updates the counter
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY1]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr1]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr2]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY1]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64 = PHI [[ADDXrr5:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[PHI3]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[PHI3]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI4]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr4]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5]]:gpr64 = ADDXrr [[ADDXrr4]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr5]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[ADDXrr5]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr6]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr7:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr5]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gpr64 = PHI [[COPY1]], %bb.3, [[ADDXrr6]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gpr64 = PHI [[PHI6]], %bb.7, [[ADDXrr8:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[PHI5]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[PHI7]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADDXrr8]]:gpr64 = ADDXrr [[PHI7]], [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI8]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI9]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ dead $xzr = SUBSXrr %12, %11, implicit-def $nzcv
+ %13:gpr64 = ADDXrr %12, %11
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-mve9.mir b/llvm/test/CodeGen/AArch64/sms-mve9.mir
new file mode 100644
index 0000000000000..469a9ecfff8ee
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-mve9.mir
@@ -0,0 +1,152 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-mve-cg -pipeliner-force-ii=3 -mcpu=neoverse-n1 2>&1 | FileCheck %s
+
+# test pipeliner code genearation by MVE algorithm
+# COPY instructions exist
+
+...
+---
+name: func
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: func
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 1
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY1]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr1:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr]], [[CSINCXr]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr1]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr2:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr1]], [[CSINCXr1]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr2:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr1]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr2]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr3:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr2]], [[CSINCXr2]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr3]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.4, implicit $nzcv
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.5(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]]
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr3:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY2]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64sp = COPY [[ADDXrr3]]
+ ; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr]], [[FMOVDi]], implicit $fpcr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:fpr64 = PHI [[FADDDrr5:%[0-9]+]], %bb.5, [[FMOVDi]], %bb.4
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr64sp = PHI [[COPY7:%[0-9]+]], %bb.5, [[COPY1]], %bb.4
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr64 = PHI [[COPY8:%[0-9]+]], %bb.5, [[COPY2]], %bb.4
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FADDDrr8:%[0-9]+]], %bb.5, [[FADDDrr]], %bb.4
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gpr64 = PHI [[ADDXrr6:%[0-9]+]], %bb.5, [[ADDXrr3]], %bb.4
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gpr64sp = PHI [[COPY9:%[0-9]+]], %bb.5, [[COPY3]], %bb.4
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:fpr64 = PHI [[FADDDrr10:%[0-9]+]], %bb.5, [[FADDDrr1]], %bb.4
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY [[PHI5]]
+ ; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI3]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = FADDDrr [[PHI6]], [[PHI]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr4:%[0-9]+]]:gpr64 = ADDXrr [[COPY4]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY4]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64sp = COPY [[ADDXrr4]]
+ ; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr2]], [[PHI3]], implicit $fpcr
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64 = COPY [[COPY5]]
+ ; CHECK-NEXT: [[FADDDrr5]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr6:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr4]], [[PHI3]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr5:%[0-9]+]]:gpr64 = ADDXrr [[COPY6]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY6]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[COPY7]]:gpr64sp = COPY [[ADDXrr5]]
+ ; CHECK-NEXT: [[FADDDrr7:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr5]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[COPY8]]:gpr64 = COPY [[COPY7]]
+ ; CHECK-NEXT: [[FADDDrr8]]:fpr64 = FADDDrr [[FMOVDi]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr9:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr7]], [[FADDDrr2]], implicit $fpcr
+ ; CHECK-NEXT: [[ADDXrr6]]:gpr64 = ADDXrr [[COPY8]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY8]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[COPY9]]:gpr64sp = COPY [[ADDXrr6]]
+ ; CHECK-NEXT: [[FADDDrr10]]:fpr64 = FADDDrr [[FADDDrr8]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: [[CSINCXr4:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr6]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr5:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr4]], [[CSINCXr4]], 1, implicit $nzcv
+ ; CHECK-NEXT: [[ADDXrr7:%[0-9]+]]:gpr64 = ADDXrr [[ADDXrr6]], [[COPY1]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[ADDXrr7]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr6:%[0-9]+]]:gpr64common = CSINCXr [[CSINCXr5]], [[CSINCXr5]], 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr6]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.5, implicit $nzcv
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[FADDDrr11:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr10]], [[FADDDrr5]], implicit $fpcr
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY8]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCXr7:%[0-9]+]]:gpr64common = CSINCXr $xzr, $xzr, 1, implicit $nzcv
+ ; CHECK-NEXT: dead $xzr = SUBSXri [[CSINCXr7]], 0, 0, implicit-def $nzcv
+ ; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.3, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gpr64common = PHI [[COPY1]], %bb.3, [[COPY9]], %bb.6
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:gpr64sp = PHI [[COPY11:%[0-9]+]]8, %bb.7, [[COPY11]], %bb.1
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:fpr64 = PHI [[PHI7]], %bb.7, [[FADDDrr12:%[0-9]+]], %bb.1
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr64 = COPY [[PHI9]]
+ ; CHECK-NEXT: dead $xzr = SUBSXrr [[COPY10]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADDXrr8:%[0-9]+]]:gpr64 = ADDXrr [[COPY10]], [[COPY1]]
+ ; CHECK-NEXT: [[COPY11]]:gpr64sp = COPY [[ADDXrr8]]
+ ; CHECK-NEXT: [[FADDDrr12]]:fpr64 = FADDDrr [[FMOVDi]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr13:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr12]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: [[FADDDrr14:%[0-9]+]]:fpr64 = FADDDrr [[FADDDrr13]], [[PHI10]], implicit $fpcr
+ ; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: [[PHI11:%[0-9]+]]:fpr64 = PHI [[FADDDrr12]], %bb.1, [[FADDDrr8]], %bb.6
+ ; CHECK-NEXT: [[PHI12:%[0-9]+]]:fpr64 = PHI [[FADDDrr14]], %bb.1, [[FADDDrr11]], %bb.6
+ ; CHECK-NEXT: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[PHI11]], [[PHI12]], implicit $fpcr
+ ; CHECK-NEXT: $d0 = COPY [[FMULDrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+ %20:fpr64 = FMOVDi 1
+
+ bb.1:
+ %12:gpr64sp = PHI %11, %bb.0, %15, %bb.1
+ %24:fpr64 = PHI %20, %bb.0, %21, %bb.1
+ %14:gpr64 = COPY %12
+ dead $xzr = SUBSXrr %14, %11, implicit-def $nzcv
+ %13:gpr64 = ADDXrr %14, %11
+ %15:gpr64sp = COPY %13
+ %21:fpr64 = FADDDrr %20, %24, implicit $fpcr
+ %22:fpr64 = FADDDrr %21, %24, implicit $fpcr
+ %23:fpr64 = FADDDrr %22, %24, implicit $fpcr
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+ %25:fpr64 = FMULDrr %21, %23, implicit $fpcr
+ $d0 = COPY %25
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir b/llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir
new file mode 100644
index 0000000000000..e6d86859a41b9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sms-unacceptable-loop3.mir
@@ -0,0 +1,109 @@
+# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -debug-only=pipeliner 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# unacceptable loops by pipeliner
+
+...
+---
+name: func1
+tracksRegLiveness: true
+body: |
+ ; multiple counter increment instructions
+ ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %13:gpr64 = ADDXrr %12, %11
+ %14:gpr64 = ADDXrr %13, %11
+ dead $xzr = SUBSXrr %10, %14, implicit-def $nzcv
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+...
+---
+name: func2
+tracksRegLiveness: true
+body: |
+ ; neither operand of the increment instruction is a loop invariant value
+ ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %13:gpr64 = ADDXrr %12, %12
+ dead $xzr = SUBSXrr %10, %13, implicit-def $nzcv
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+...
+---
+name: func3
+tracksRegLiveness: true
+body: |
+ ; neither operand of the compare instruction is a loop invariant value
+ ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %13:gpr64 = ADDXrr %12, %11
+ dead $xzr = SUBSXrr %13, %13, implicit-def $nzcv
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+...
+---
+name: func4
+tracksRegLiveness: true
+body: |
+ ; multiple phi instructions
+ ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %14:gpr64 = PHI %11, %bb.0, %15, %bb.1
+ dead $xzr = SUBSXrr %12, %10, implicit-def $nzcv
+ %13:gpr64 = ADDXrr %14, %11
+ %15:gpr64 = ADDXrr %12, %11
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+...
+---
+name: func5
+tracksRegLiveness: true
+body: |
+ ; not an increment instruction
+ ; CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+ bb.0.entry:
+ liveins: $x0, $x1
+ %10:gpr64 = COPY $x0
+ %11:gpr64 = COPY $x1
+
+ bb.1:
+ %12:gpr64 = PHI %11, %bb.0, %13, %bb.1
+ %13:gpr64 = ORRXrr %12, %12
+ dead $xzr = SUBSXrr %12, %10, implicit-def $nzcv
+ Bcc 1, %bb.1, implicit $nzcv
+ B %bb.2
+
+ bb.2:
+...
diff --git a/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir b/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir
index c5b76d88ff00d..290f7027244f2 100644
--- a/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir
+++ b/llvm/test/CodeGen/AArch64/sms-unpipeline-insts2.mir
@@ -1,9 +1,12 @@
# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-enable-copytophi=0 -debug-only=pipeliner 2>&1 | FileCheck %s
# REQUIRES: asserts
-# An acceptable loop by pipeliner TBB == LoopBB, FBB == ExitBB, Compare and branch
-# CHECK: SU([[SU0:[0-9]+]]): [[V0:%[0-9]+]]:gpr64common = SUBXri [[V1:%[0-9]+]]:gpr64common, 1, 0
-# CHECK: Do not pipeline SU([[SU0:[0-9]+]])
+# Compare and branch instructions are not supported now.
+# CHECK: Unable to analyzeLoop, can NOT pipeline Loop
+
+# (if supported) Check that instructions for loop control are not pipelined when compare and branch is used.
+# CHECK(if supported): SU([[SU0:[0-9]+]]): [[V0:%[0-9]+]]:gpr64common = SUBXri [[V1:%[0-9]+]]:gpr64common, 1, 0
+# CHECK(if supported): Do not pipeline SU([[SU0:[0-9]+]])
--- |
define dso_local void @func(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr #0 {
More information about the llvm-commits
mailing list