[llvm] fa49021 - Revert "[CodeGen][ARM] Enable Swing Module Scheduling for ARM"

Thu Apr 28 13:29:50 PDT 2022

Author: David Penry
Date: 2022-04-28T13:29:27-07:00
New Revision: fa49021c68ef7a7adcdf7b8a44b9006506523191

URL: https://github.com/llvm/llvm-project/commit/fa49021c68ef7a7adcdf7b8a44b9006506523191
DIFF: https://github.com/llvm/llvm-project/commit/fa49021c68ef7a7adcdf7b8a44b9006506523191.diff

LOG: Revert "[CodeGen][ARM] Enable Swing Module Scheduling for ARM"

This reverts commit 28d09bbbc3d09c912b54a4d5edb32cab7de32a6f
while I investigate a buildbot failure.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/MachinePipeliner.h
    llvm/include/llvm/CodeGen/ModuloSchedule.h
    llvm/lib/CodeGen/MachinePipeliner.cpp
    llvm/lib/CodeGen/ModuloSchedule.cpp
    llvm/lib/Target/ARM/ARM.td
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/lib/Target/ARM/ARMBaseInstrInfo.h
    llvm/lib/Target/ARM/ARMSubtarget.cpp
    llvm/lib/Target/ARM/ARMSubtarget.h
    llvm/lib/Target/ARM/ARMTargetMachine.cpp
    llvm/test/CodeGen/ARM/O3-pipeline.ll

Removed: 
    llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
    llvm/test/CodeGen/Thumb2/swp-fixedii.mir


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 4559f7a9bde7..170f20182001 100644

--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -84,8 +84,6 @@ class MachinePipeliner : public MachineFunctionPass {
     SmallVector<MachineOperand, 4> BrCond;
     MachineInstr *LoopInductionVar = nullptr;
     MachineInstr *LoopCompare = nullptr;
-    std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
-        nullptr;
   };
   LoopInfo LI;
 
@@ -121,7 +119,6 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
   LiveIntervals &LIS;
   const RegisterClassInfo &RegClassInfo;
   unsigned II_setByPragma = 0;
-  TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
 
   /// A toplogical ordering of the SUnits, which is needed for changing
   /// dependences and iterating over the SUnits.
@@ -199,11 +196,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
 
 public:
   SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
-                    const RegisterClassInfo &rci, unsigned II,
-                    TargetInstrInfo::PipelinerLoopInfo *PLI)
+                    const RegisterClassInfo &rci, unsigned II)
       : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
-        RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
-        Topo(SUnits, &ExitSU) {
+        RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
     P.MF->getSubtarget().getSMSMutations(Mutations);
     if (SwpEnableCopyToPhi)
       Mutations.push_back(std::make_unique<CopyToPhiMutation>());
@@ -594,13 +589,6 @@ class SMSchedule {
     return ScheduledInstrs[cycle];
   }
 
-  SmallSet<SUnit *, 8>
-  computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
-                             TargetInstrInfo::PipelinerLoopInfo *PLI);
-
-  bool
-  normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
-                                    TargetInstrInfo::PipelinerLoopInfo *PLI);
   bool isValidSchedule(SwingSchedulerDAG *SSD);
   void finalizeSchedule(SwingSchedulerDAG *SSD);
   void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,

diff  --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index c515101e80fd..4045df807cdb 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -191,8 +191,8 @@ class ModuloScheduleExpander {
   void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
                       ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
   void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
-                      MachineBasicBlock *OrigBB, ValueMapTy *VRMap,
-                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+                      ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
+                      MBBVectorTy &PrologBBs);
   void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
                             MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
                             ValueMapTy *VRMap, InstrMapTy &InstrMap,

diff  --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 9ea6e9b98172..0bffa9154fc4 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -255,7 +255,6 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
              << "Failed to pipeline loop";
     });
 
-    LI.LoopPipelinerInfo.reset();
     return Changed;
   }
 
@@ -263,7 +262,6 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
 
   Changed = swingModuloScheduler(L);
 
-  LI.LoopPipelinerInfo.reset();
   return Changed;
 }
 
@@ -356,8 +354,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
 
   LI.LoopInductionVar = nullptr;
   LI.LoopCompare = nullptr;
-  LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
-  if (!LI.LoopPipelinerInfo) {
+  if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
     LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
     NumFailLoop++;
     ORE->emit([&]() {
@@ -422,7 +419,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
   assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
 
   SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
-                        II_setByPragma, LI.LoopPipelinerInfo.get());
+                        II_setByPragma);
 
   MachineBasicBlock *MBB = L.getHeader();
   // The kernel should not include any terminator instructions.  These
@@ -1425,7 +1422,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
 /// in the calculation of the ASAP, ALAP, etc functions.
 static bool ignoreDependence(const SDep &D, bool isPred) {
-  if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
+  if (D.isArtificial())
     return true;
   return D.getKind() == SDep::Anti && isPred;
 }
@@ -1474,8 +1471,6 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
     SUnit *SU = &SUnits[I];
     for (const SDep &S : SU->Succs) {
       SUnit *succ = S.getSUnit();
-      if (succ->isBoundaryNode())
-        continue;
       if (S.getLatency() == 0)
         zeroLatencyHeight =
             std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
@@ -1793,8 +1788,7 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
   NodesAdded.insert(SU);
   for (auto &SI : SU->Succs) {
     SUnit *Successor = SI.getSUnit();
-    if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
-        NodesAdded.count(Successor) == 0)
+    if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
       addConnectedNodes(Successor, NewSet, NodesAdded);
   }
   for (auto &PI : SU->Preds) {
@@ -2086,11 +2080,6 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
       });
     } while (++NI != NE && scheduleFound);
 
-    // If a schedule is found, ensure non-pipelined instructions are in stage 0
-    if (scheduleFound)
-      scheduleFound =
-          Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
-
     // If a schedule is found, check if it is a valid schedule too.
     if (scheduleFound)
       scheduleFound = Schedule.isValidSchedule(this);
@@ -2274,7 +2263,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
 bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
                                          bool isSucc) {
   if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
-      Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
+      Dep.isArtificial())
     return false;
 
   if (!SwpPruneLoopCarried)
@@ -2441,7 +2430,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
   while (!Worklist.empty()) {
     const SDep &Cur = Worklist.pop_back_val();
     SUnit *SuccSU = Cur.getSUnit();
-    if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
+    if (Visited.count(SuccSU))
       continue;
     std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
     if (it == InstrToCycle.end())
@@ -2708,91 +2697,21 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
   return false;
 }
 
-/// Determine transitive dependences of unpipelineable instructions
-SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
-    SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
-  SmallSet<SUnit *, 8> DoNotPipeline;
-  SmallVector<SUnit *, 8> Worklist;
-
-  for (auto &SU : SSD->SUnits)
-    if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
-      Worklist.push_back(&SU);
-
-  while (!Worklist.empty()) {
-    auto SU = Worklist.pop_back_val();
-    if (DoNotPipeline.count(SU))
-      continue;
-    LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
-    DoNotPipeline.insert(SU);
-    for (auto &Dep : SU->Preds)
-      Worklist.push_back(Dep.getSUnit());
-    if (SU->getInstr()->isPHI())
-      for (auto &Dep : SU->Succs)
-        if (Dep.getKind() == SDep::Anti)
-          Worklist.push_back(Dep.getSUnit());
-  }
-  return DoNotPipeline;
-}
-
-// Determine all instructions upon which any unpipelineable instruction depends
-// and ensure that they are in stage 0.  If unable to do so, return false.
-bool SMSchedule::normalizeNonPipelinedInstructions(
-    SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
-  SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI);
-
-  int NewLastCycle = INT_MIN;
-  for (SUnit &SU : SSD->SUnits) {
-    if (!SU.isInstr())
-      continue;
-    if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
-      NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
-      continue;
-    }
-
-    // Put the non-pipelined instruction as early as possible in the schedule
-    int NewCycle = getFirstCycle();
-    for (auto &Dep : SU.Preds)
-      NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
-
-    int OldCycle = InstrToCycle[&SU];
-    if (OldCycle != NewCycle) {
-      InstrToCycle[&SU] = NewCycle;
-      auto &OldS = getInstructions(OldCycle);
-      OldS.erase(std::remove(OldS.begin(), OldS.end(), &SU), OldS.end());
-      getInstructions(NewCycle).emplace_back(&SU);
-      LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
-                        << ") is not pipelined; moving from cycle " << OldCycle
-                        << " to " << NewCycle << " Instr:" << *SU.getInstr());
-    }
-    NewLastCycle = std::max(NewLastCycle, NewCycle);
-  }
-  LastCycle = NewLastCycle;
-  return true;
-}
-
 // Check if the generated schedule is valid. This function checks if
 // an instruction that uses a physical register is scheduled in a
 // 
diff erent stage than the definition. The pipeliner does not handle
 // physical register values that may cross a basic block boundary.
-// Furthermore, if a physical def/use pair is assigned to the same
-// cycle, orderDependence does not guarantee def/use ordering, so that
-// case should be considered invalid.  (The test checks for both
-// earlier and same-cycle use to be more robust.)
 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
   for (SUnit &SU : SSD->SUnits) {
     if (!SU.hasPhysRegDefs)
       continue;
     int StageDef = stageScheduled(&SU);
-    int CycleDef = InstrToCycle[&SU];
     assert(StageDef != -1 && "Instruction should have been scheduled.");
     for (auto &SI : SU.Succs)
-      if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
-        if (Register::isPhysicalRegister(SI.getReg())) {
+      if (SI.isAssignedRegDep())
+        if (Register::isPhysicalRegister(SI.getReg()))
           if (stageScheduled(SI.getSUnit()) != StageDef)
             return false;
-          if (InstrToCycle[SI.getSUnit()] <= CycleDef)
-            return false;
-        }
   }
   return true;
 }

diff  --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 20aecdf222e2..b974fa9846f2 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -158,7 +158,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
 
   SmallVector<MachineBasicBlock *, 4> EpilogBBs;
   // Generate the epilog instructions to complete the pipeline.
-  generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
+  generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
 
   // We need this step because the register allocation doesn't handle some
   // situations well, so we insert copies to help out.
@@ -240,9 +240,11 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
 /// Generate the pipeline epilog code. The epilog code finishes the iterations
 /// that were started in either the prolog or the kernel.  We create a basic
 /// block for each stage that needs to complete.
-void ModuloScheduleExpander::generateEpilog(
-    unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
-    ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
+void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
+                                            MachineBasicBlock *KernelBB,
+                                            ValueMapTy *VRMap,
+                                            MBBVectorTy &EpilogBBs,
+                                            MBBVectorTy &PrologBBs) {
   // We need to change the branch from the kernel to the first epilog block, so
   // this call to analyze branch uses the kernel rather than the original BB.
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -312,12 +314,7 @@ void ModuloScheduleExpander::generateEpilog(
   // Create a branch to the new epilog from the kernel.
   // Remove the original branch and add a new branch to the epilog.
   TII->removeBranch(*KernelBB);
-  assert((OrigBB == TBB || OrigBB == FBB) &&
-         "Unable to determine looping branch direction");
-  if (OrigBB != TBB)
-    TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
-  else
-    TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
   // Add a branch to the loop exit.
   if (EpilogBBs.size() > 0) {
     MachineBasicBlock *LastEpilogBB = EpilogBBs.back();

diff  --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 4bcba38efe02..d9bc2827f7d2 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -494,10 +494,6 @@ def FeatureNoNegativeImmediates
 def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true",
                                         "Use the MachineScheduler">;
 
-// Use the MachinePipeliner for instruction scheduling for the subtarget.
-def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true",
-                                            "Use the MachinePipeliner">;
-
 // False if scheduling should happen again after register allocation.
 def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
     "DisablePostRAScheduler", "true",
@@ -1399,7 +1395,6 @@ def : ProcessorModel<"cortex-m4", CortexM4Model,        [ARMv7em,
 def : ProcessorModel<"cortex-m7", CortexM7Model,        [ARMv7em,
                                                          ProcM7,
                                                          FeatureFPARMv8_D16,
-                                                         FeatureUseMIPipeliner,
                                                          FeatureUseMISched]>;
 
 def : ProcNoItin<"cortex-m23",                          [ARMv8mBaseline,

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 28dd630ed23b..9bb9df536b2e 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -6721,78 +6721,3 @@ unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {
   return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
                                                           : ARM::BLX_pred;
 }
-
-namespace {
-class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
-  MachineInstr *Loop, *EndLoop, *LoopCount;
-  MachineFunction *MF;
-  const TargetInstrInfo *TII;
-
-  // Meanings of the various stuff with loop types:
-  // t2Bcc:
-  //   Loop = null -- there is no setup.
-  //   EndLoop = branch at end of original BB that will become a kernel
-  //   LoopCount = CC setter live into branch
-public:
-  ARMPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
-                       MachineInstr *LoopCount)
-      : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
-        MF(EndLoop->getParent()->getParent()),
-        TII(MF->getSubtarget().getInstrInfo()) {}
-
-  bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
-    // Only ignore the terminator.
-    return MI == EndLoop || MI == LoopCount;
-  }
-
-  Optional<bool> createTripCountGreaterCondition(
-      int TC, MachineBasicBlock &MBB,
-      SmallVectorImpl<MachineOperand> &Cond) override {
-
-    if (isCondBranchOpcode(EndLoop->getOpcode())) {
-      Cond.push_back(EndLoop->getOperand(1));
-      Cond.push_back(EndLoop->getOperand(2));
-      if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
-        TII->reverseBranchCondition(Cond);
-      }
-      return {};
-    } else
-      llvm_unreachable("Unknown EndLoop");
-  }
-
-  void setPreheader(MachineBasicBlock *NewPreheader) override {}
-
-  void adjustTripCount(int TripCountAdjust) override {}
-
-  void disposed() override {}
-};
-} // namespace
-
-std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
-ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
-  MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
-  MachineBasicBlock *Preheader = *LoopBB->pred_begin();
-  if (Preheader == LoopBB)
-    Preheader = *std::next(LoopBB->pred_begin());
-
-  if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
-    // If the branch is a Bcc, then the CPSR should be set somewhere within the
-    // block.  We need to determine the reaching definition of CPSR so that
-    // it can be marked as non-pipelineable, allowing the pipeliner to force
-    // it into stage 0 or give up if it cannot or will not do so.
-    MachineInstr *CCSetter = nullptr;
-    for (auto &L : LoopBB->instrs()) {
-      if (L.isCall())
-        return nullptr;
-      if (isCPSRDefined(L))
-        CCSetter = &L;
-    }
-    if (CCSetter)
-      return std::make_unique<ARMPipelinerLoopInfo>(nullptr, &*I, CCSetter);
-    else
-      return nullptr; // Unable to find the CC setter, so unable to guarantee
-                      // that pipeline will work
-  }
-
-  return nullptr;
-}

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 40acb27d1eb1..ab9643592724 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -372,11 +372,6 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
            MI->getOpcode() == ARM::t2WhileLoopStartTP;
   }
 
-  /// Analyze loop L, which must be a single-basic-block loop, and if the
-  /// conditions can be understood enough produce a PipelinerLoopInfo object.
-  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
-  analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
-
 private:
   /// Returns an unused general-purpose register which can be used for
   /// constructing an outlined call if one exists. Returns 0 otherwise.

diff  --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 89e5b8762d80..b62f447e8d58 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -393,14 +393,6 @@ bool ARMSubtarget::enableSubRegLiveness() const {
   return hasMVEIntegerOps();
 }
 
-bool ARMSubtarget::enableMachinePipeliner() const {
-  // Enable the MachinePipeliner before register allocation for subtargets
-  // with the use-mipipeliner feature.
-  return getSchedModel().hasInstrSchedModel() && useMachinePipeliner();
-}
-
-bool ARMSubtarget::useDFAforSMS() const { return false; }
-
 // This overrides the PostRAScheduler bit in the SchedModel for any CPU.
 bool ARMSubtarget::enablePostRAScheduler() const {
   if (enableMachineScheduler())

diff  --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 8d56c70e8094..d426157c5453 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -417,7 +417,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   bool isRWPI() const;
 
   bool useMachineScheduler() const { return UseMISched; }
-  bool useMachinePipeliner() const { return UseMIPipeliner; }
   bool hasMinSize() const { return OptMinSize; }
   bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
   bool isThumb2() const { return isThumb() && hasThumb2(); }
@@ -466,10 +465,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
   /// Returns true if machine scheduler should be enabled.
   bool enableMachineScheduler() const override;
 
-  /// Returns true if machine pipeliner should be enabled.
-  bool enableMachinePipeliner() const override;
-  bool useDFAforSMS() const override;
-
   /// True for some subtargets at > -O0.
   bool enablePostRAScheduler() const override;
 

diff  --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index dadf7a557238..401a00841747 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -509,9 +509,6 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
 
 void ARMPassConfig::addPreRegAlloc() {
   if (getOptLevel() != CodeGenOpt::None) {
-    if (getOptLevel() == CodeGenOpt::Aggressive)
-      addPass(&MachinePipelinerID);
-
     addPass(createMVETPAndVPTOptimisationsPass());
 
     addPass(createMLxExpansionPass());

diff  --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
index 6e5513458111..10c56a3c495b 100644
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -95,13 +95,6 @@
 ; CHECK-NEXT:      Peephole Optimizations
 ; CHECK-NEXT:      Remove dead machine instructions
 ; CHECK-NEXT:      MachineDominator Tree Construction
-; CHECK-NEXT:      Slot index numbering
-; CHECK-NEXT:      Live Interval Analysis
-; CHECK-NEXT:      Lazy Machine Block Frequency Analysis
-; CHECK-NEXT:      Machine Optimization Remark Emitter
-; CHECK-NEXT:      Modulo Software Pipelining
-; CHECK-NEXT:      MachineDominator Tree Construction
-; CHECK-NEXT:      Machine Natural Loop Construction
 ; CHECK-NEXT:      MVE TailPred and VPT Optimisation Pass
 ; CHECK-NEXT:      ARM MLA / MLS expansion pass
 ; CHECK-NEXT:      MachineDominator Tree Construction

diff  --git a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
deleted file mode 100644
index 3f3ff5e4bd29..000000000000
--- a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
+++ /dev/null
@@ -1,203 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
-
---- |
-  define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {
-  entry:
-    %cmp8 = icmp sgt i32 %sz, 0
-    br i1 %cmp8, label %for.body.preheader, label %for.end
-
-  for.body.preheader:                               ; preds = %entry
-    %scevgep = getelementptr float, float* %b, i32 -1
-    %scevgep4 = getelementptr float, float* %a, i32 -1
-    br label %for.body
-
-  for.body:                                         ; preds = %for.body.preheader, %for.body
-    %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
-    %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
-    %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
-    %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-    %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
-    %0 = load float, float* %scevgep7, align 4
-    %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1
-    %1 = load float, float* %scevgep3, align 4
-    %mul = fmul fast float %1, %0
-    %add = fadd fast float %mul, %sum.010
-    %lsr.iv.next = add i32 %lsr.iv, -1
-    %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1
-    %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1
-    %exitcond.not = icmp ne i32 %lsr.iv.next, 0
-    br i1 %exitcond.not, label %for.body, label %for.end, !llvm.loop !0
-
-  for.end:                                          ; preds = %for.body, %entry
-    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-    ret float %sum.0.lcssa
-  }
-
-  !0 = distinct !{!0, !1, !2, !3}
-  !1 = !{!"llvm.loop.mustprogress"}
-  !2 = !{!"llvm.loop.unroll.disable"}
-  !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
-
-...
----
-name:            dot
-alignment:       2
-tracksRegLiveness: true
-constants:
-  - id:              0
-    value:           'float 0.000000e+00'
-    alignment:       4
-    isTargetSpecific: false
-body:             |
-  ; CHECK-LABEL: name: dot
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
-  ; CHECK-NEXT:   liveins: $r0, $r1, $r2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnopc = COPY $r2
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gprnopc = COPY $r1
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gprnopc = COPY $r0
-  ; CHECK-NEXT:   t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK-NEXT:   t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.for.body.preheader:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]]
-  ; CHECK-NEXT:   [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]]
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.for.body:
-  ; CHECK-NEXT:   successors: %bb.6(0x40000000), %bb.9(0x40000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
-  ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:gprnopc = COPY [[t2ADDri]]
-  ; CHECK-NEXT:   t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
-  ; CHECK-NEXT:   t2B %bb.6, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.for.body:
-  ; CHECK-NEXT:   successors: %bb.7(0x80000000), %bb.8(0x00000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
-  ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
-  ; CHECK-NEXT:   t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
-  ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.7.for.body:
-  ; CHECK-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %49, %bb.7
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %50, %bb.7
-  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %51, %bb.7
-  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
-  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
-  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
-  ; CHECK-NEXT:   [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
-  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
-  ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.8:
-  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
-  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
-  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.9:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
-  ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
-  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.for.end:
-  ; CHECK-NEXT:   [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS2]], %bb.9
-  ; CHECK-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI11]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   $r0 = COPY [[VMOVRS]]
-  ; CHECK-NEXT:   tBX_RET 14 /* CC::al */, $noreg, implicit $r0
-  bb.0.entry:
-    successors: %bb.1(0x50000000), %bb.4(0x30000000)
-    liveins: $r0, $r1, $r2
-
-    %13:gprnopc = COPY $r2
-    %12:gprnopc = COPY $r1
-    %11:gprnopc = COPY $r0
-    t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
-
-  bb.4:
-    successors: %bb.3(0x80000000)
-
-    %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-    t2B %bb.3, 14 /* CC::al */, $noreg
-
-  bb.1.for.body.preheader:
-    successors: %bb.2(0x80000000)
-
-    %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg
-    %0:gpr = COPY %16
-    %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg
-    %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-    %1:gpr = COPY %17
-
-  bb.2.for.body:
-    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
-
-    %2:gprnopc = PHI %1, %bb.1, %9, %bb.2
-    %3:gprnopc = PHI %0, %bb.1, %8, %bb.2
-    %4:gprnopc = PHI %13, %bb.1, %7, %bb.2
-    %5:spr = PHI %15, %bb.1, %6, %bb.2
-    %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg
-    %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
-    %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
-    %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
-    %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
-    %7:gpr = COPY %23
-    %8:gpr = COPY %20
-    %9:gpr = COPY %18
-    t2Bcc %bb.3, 0 /* CC::eq */, $cpsr
-    t2B %bb.2, 14 /* CC::al */, $noreg
-
-  bb.3.for.end:
-    %10:spr = PHI %14, %bb.4, %6, %bb.2
-    %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg
-    $r0 = COPY %24
-    tBX_RET 14 /* CC::al */, $noreg, implicit $r0
-
-...

diff  --git a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
deleted file mode 100644
index 579123c48a1f..000000000000
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
+++ /dev/null
@@ -1,203 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
-
---- |
-  define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {
-  entry:
-    %cmp8 = icmp sgt i32 %sz, 0
-    br i1 %cmp8, label %for.body.preheader, label %for.end
-
-  for.body.preheader:                               ; preds = %entry
-    %scevgep = getelementptr float, float* %b, i32 -1
-    %scevgep4 = getelementptr float, float* %a, i32 -1
-    br label %for.body
-
-  for.body:                                         ; preds = %for.body.preheader, %for.body
-    %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
-    %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
-    %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
-    %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
-    %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
-    %0 = load float, float* %scevgep7, align 4
-    %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1
-    %1 = load float, float* %scevgep3, align 4
-    %mul = fmul fast float %1, %0
-    %add = fadd fast float %mul, %sum.010
-    %lsr.iv.next = add i32 %lsr.iv, -1
-    %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1
-    %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1
-    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
-    br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
-  for.end:                                          ; preds = %for.body, %entry
-    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-    ret float %sum.0.lcssa
-  }
-
-  !0 = distinct !{!0, !1, !2, !3}
-  !1 = !{!"llvm.loop.mustprogress"}
-  !2 = !{!"llvm.loop.unroll.disable"}
-  !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
-
-...
----
-name:            dot
-alignment:       2
-tracksRegLiveness: true
-constants:
-  - id:              0
-    value:           'float 0.000000e+00'
-    alignment:       4
-    isTargetSpecific: false
-body:             |
-  ; CHECK-LABEL: name: dot
-  ; CHECK: bb.0.entry:
-  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
-  ; CHECK-NEXT:   liveins: $r0, $r1, $r2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnopc = COPY $r2
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gprnopc = COPY $r1
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gprnopc = COPY $r0
-  ; CHECK-NEXT:   t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-  ; CHECK-NEXT:   t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2.for.body.preheader:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]]
-  ; CHECK-NEXT:   [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]]
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5.for.body:
-  ; CHECK-NEXT:   successors: %bb.6(0x40000000), %bb.9(0x40000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
-  ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:gprnopc = COPY [[t2ADDri]]
-  ; CHECK-NEXT:   t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
-  ; CHECK-NEXT:   t2B %bb.6, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6.for.body:
-  ; CHECK-NEXT:   successors: %bb.7(0x80000000), %bb.8(0x00000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
-  ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
-  ; CHECK-NEXT:   t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
-  ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.7.for.body:
-  ; CHECK-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %49, %bb.7
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %50, %bb.7
-  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %51, %bb.7
-  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
-  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
-  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
-  ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
-  ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
-  ; CHECK-NEXT:   [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
-  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
-  ; CHECK-NEXT:   t2B %bb.8, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.8:
-  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
-  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
-  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
-  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.9:
-  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
-  ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
-  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.for.end:
-  ; CHECK-NEXT:   [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS2]], %bb.9
-  ; CHECK-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI11]], 14 /* CC::al */, $noreg
-  ; CHECK-NEXT:   $r0 = COPY [[VMOVRS]]
-  ; CHECK-NEXT:   tBX_RET 14 /* CC::al */, $noreg, implicit $r0
-  bb.0.entry:
-    successors: %bb.1(0x50000000), %bb.4(0x30000000)
-    liveins: $r0, $r1, $r2
-
-    %13:gprnopc = COPY $r2
-    %12:gprnopc = COPY $r1
-    %11:gprnopc = COPY $r0
-    t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
-    t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
-
-  bb.4:
-    successors: %bb.3(0x80000000)
-
-    %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-    t2B %bb.3, 14 /* CC::al */, $noreg
-
-  bb.1.for.body.preheader:
-    successors: %bb.2(0x80000000)
-
-    %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg
-    %0:gpr = COPY %16
-    %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg
-    %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
-    %1:gpr = COPY %17
-
-  bb.2.for.body:
-    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
-
-    %2:gprnopc = PHI %1, %bb.1, %9, %bb.2
-    %3:gprnopc = PHI %0, %bb.1, %8, %bb.2
-    %4:gprnopc = PHI %13, %bb.1, %7, %bb.2
-    %5:spr = PHI %15, %bb.1, %6, %bb.2
-    %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg
-    %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
-    %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
-    %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
-    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
-    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
-    %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
-    %7:gpr = COPY %23
-    %8:gpr = COPY %20
-    %9:gpr = COPY %18
-    t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
-    t2B %bb.3, 14 /* CC::al */, $noreg
-
-  bb.3.for.end:
-    %10:spr = PHI %14, %bb.4, %6, %bb.2
-    %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg
-    $r0 = COPY %24
-    tBX_RET 14 /* CC::al */, $noreg, implicit $r0
-
-...