[llvm] 8c4aea4 - [ModuloSchedule] Add interface call to accept/reject SMS schedules

David Penry via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 17 08:13:43 PDT 2022


Author: David Penry
Date: 2022-08-17T08:13:26-07:00
New Revision: 8c4aea438c310816bb4e4f9a32d783381ef3182e

URL: https://github.com/llvm/llvm-project/commit/8c4aea438c310816bb4e4f9a32d783381ef3182e
DIFF: https://github.com/llvm/llvm-project/commit/8c4aea438c310816bb4e4f9a32d783381ef3182e.diff

LOG: [ModuloSchedule] Add interface call to accept/reject SMS schedules

This interface allows a target to reject a proposed
SMS schedule.  For Hexagon/PowerPC, all schedules
are accepted, leaving behavior unchanged.  For ARM,
schedules which exceed register pressure limits are
rejected.

Also, two RegisterPressureTracker methods now need to be public so
that register pressure can be computed by more callers.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D128941

Added: 
    llvm/test/CodeGen/Thumb2/swp-regpressure.mir

Modified: 
    llvm/include/llvm/CodeGen/RegisterPressure.h
    llvm/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/lib/CodeGen/MachinePipeliner.cpp
    llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index c40c0eec80ecb..1164b60a11eb6 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -537,6 +537,11 @@ class RegPressureTracker {
 
   void dump() const;
 
+  void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
+                           LaneBitmask NewMask);
+  void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
+                           LaneBitmask NewMask);
+
 protected:
   /// Add Reg to the live out set and increase max pressure.
   void discoverLiveOut(RegisterMaskPair Pair);
@@ -547,11 +552,6 @@ class RegPressureTracker {
   /// after the current position.
   SlotIndex getCurrSlot() const;
 
-  void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
-                           LaneBitmask NewMask);
-  void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask,
-                           LaneBitmask NewMask);
-
   void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs);
 
   void bumpUpwardPressure(const MachineInstr *MI);

diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 72f69f4c6b779..65fba772c0c88 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -54,6 +54,8 @@ class ScheduleDAGMI;
 class ScheduleHazardRecognizer;
 class SDNode;
 class SelectionDAG;
+class SMSchedule;
+class SwingSchedulerDAG;
 class RegScavenger;
 class TargetRegisterClass;
 class TargetRegisterInfo;
@@ -729,6 +731,13 @@ class TargetInstrInfo : public MCInstrInfo {
     /// update with no users being pipelined.
     virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const = 0;
 
+    /// Return true if the proposed schedule should used.  Otherwise return
+    /// false to not pipeline the loop. This function should be used to ensure
+    /// that pipelined loops meet target-specific quality heuristics.
+    virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) {
+      return true;
+    }
+
     /// Create a condition to determine if the trip count of the loop is greater
     /// than TC, where TC is always one more than for the previous prologue or
     /// 0 if this is being called for the outermost prologue.

diff  --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 52501ca7c8714..e561e0af20c95 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -2098,6 +2098,12 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
                     << " (II=" << Schedule.getInitiationInterval()
                     << ")\n");
 
+  if (scheduleFound) {
+    scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
+    if (!scheduleFound)
+      dbgs() << "Target rejected schedule\n";
+  }
+
   if (scheduleFound) {
     Schedule.finalizeSchedule(this);
     Pass.ORE->emit([&]() {

diff  --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 92aa103112136..00fabb025cdcd 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -35,6 +36,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePipeliner.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/MultiHazardRecognizer.h"
@@ -6756,6 +6758,19 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
   MachineFunction *MF;
   const TargetInstrInfo *TII;
 
+  // Bitset[0 .. MAX_STAGES-1] ... iterations needed
+  //       [LAST_IS_USE] : last reference to register in schedule is a use
+  //       [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live
+  static int constexpr MAX_STAGES = 30;
+  static int constexpr LAST_IS_USE = MAX_STAGES;
+  static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;
+  typedef std::bitset<MAX_STAGES + 2> IterNeed;
+  typedef std::map<unsigned, IterNeed> IterNeeds;
+
+  void bumpCrossIterationPressure(RegPressureTracker &RPT,
+                                  const IterNeeds &CIN);
+  bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);
+
   // Meanings of the various stuff with loop types:
   // t2Bcc:
   //   EndLoop = branch at end of original BB that will become a kernel
@@ -6774,6 +6789,13 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
     return MI == EndLoop || MI == LoopCount;
   }
 
+  bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {
+    if (tooMuchRegisterPressure(SSD, SMS))
+      return false;
+
+    return true;
+  }
+
   Optional<bool> createTripCountGreaterCondition(
       int TC, MachineBasicBlock &MBB,
       SmallVectorImpl<MachineOperand> &Cond) override {
@@ -6812,6 +6834,145 @@ class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
 
   void disposed() override {}
 };
+
+void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,
+                                                      const IterNeeds &CIN) {
+  // Increase pressure by the amounts in CrossIterationNeeds
+  for (const auto &N : CIN) {
+    int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
+    for (int I = 0; I < Cnt; ++I)
+      RPT.increaseRegPressure(Register(N.first), LaneBitmask::getNone(),
+                              LaneBitmask::getAll());
+  }
+  // Decrease pressure by the amounts in CrossIterationNeeds
+  for (const auto &N : CIN) {
+    int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;
+    for (int I = 0; I < Cnt; ++I)
+      RPT.decreaseRegPressure(Register(N.first), LaneBitmask::getAll(),
+                              LaneBitmask::getNone());
+  }
+}
+
+bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,
+                                                   SMSchedule &SMS) {
+  IterNeeds CrossIterationNeeds;
+
+  // Determine which values will be loop-carried after the schedule is
+  // applied
+
+  for (auto &SU : SSD.SUnits) {
+    const MachineInstr *MI = SU.getInstr();
+    int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));
+    for (auto &S : SU.Succs)
+      if (MI->isPHI() && S.getKind() == SDep::Anti) {
+        Register Reg = S.getReg();
+        if (Register::isVirtualRegister(Reg))
+          CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
+              .first->second.set(0);
+      } else if (S.isAssignedRegDep()) {
+        int OStg = SMS.stageScheduled(S.getSUnit());
+        if (OStg >= 0 && OStg != Stg) {
+          Register Reg = S.getReg();
+          if (Register::isVirtualRegister(Reg))
+            CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))
+                .first->second |= ((1 << (OStg - Stg)) - 1);
+        }
+      }
+  }
+
+  // Determine more-or-less what the proposed schedule (reversed) is going to
+  // be; it might not be quite the same because the within-cycle ordering
+  // created by SMSchedule depends upon changes to help with address offsets and
+  // the like.
+  std::vector<SUnit *> ProposedSchedule;
+  for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)
+    for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;
+         ++Stage) {
+      std::deque<SUnit *> Instrs =
+          SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());
+      std::sort(Instrs.begin(), Instrs.end(),
+                [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });
+      for (SUnit *SU : Instrs)
+        ProposedSchedule.push_back(SU);
+    }
+
+  // Learn whether the last use/def of each cross-iteration register is a use or
+  // def. If it is a def, RegisterPressure will implicitly increase max pressure
+  // and we do not have to add the pressure.
+  for (auto SU : ProposedSchedule)
+    for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();
+         ++OperI) {
+      auto MO = *OperI;
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      Register Reg = MO.getReg();
+      auto CIter = CrossIterationNeeds.find(Reg.id());
+      if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||
+          CIter->second[SEEN_AS_LIVE])
+        continue;
+      if (MO.isDef() && !MO.isDead())
+        CIter->second.set(SEEN_AS_LIVE);
+      else if (MO.isUse())
+        CIter->second.set(LAST_IS_USE);
+    }
+  for (auto &CI : CrossIterationNeeds)
+    CI.second.reset(LAST_IS_USE);
+
+  RegionPressure RecRegPressure;
+  RegPressureTracker RPTracker(RecRegPressure);
+  RegisterClassInfo RegClassInfo;
+  RegClassInfo.runOnMachineFunction(*MF);
+  RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),
+                 EndLoop->getParent()->end(), false, false);
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+
+  bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
+
+  for (auto SU : ProposedSchedule) {
+    MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
+    RPTracker.setPos(std::next(CurInstI));
+    RPTracker.recede();
+
+    // Track what cross-iteration registers would be seen as live
+    for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {
+      auto MO = *OperI;
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      Register Reg = MO.getReg();
+      if (MO.isDef() && !MO.isDead()) {
+        auto CIter = CrossIterationNeeds.find(Reg.id());
+        if (CIter != CrossIterationNeeds.end()) {
+          CIter->second.reset(0);
+          CIter->second.reset(SEEN_AS_LIVE);
+        }
+      }
+    }
+    for (auto &S : SU->Preds) {
+      auto Stg = SMS.stageScheduled(SU);
+      if (S.isAssignedRegDep()) {
+        Register Reg = S.getReg();
+        auto CIter = CrossIterationNeeds.find(Reg.id());
+        if (CIter != CrossIterationNeeds.end()) {
+          auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));
+          assert(Stg2 <= Stg && "Data dependence upon earlier stage");
+          if (Stg - Stg2 < MAX_STAGES)
+            CIter->second.set(1u << (Stg - Stg2));
+          CIter->second.set(SEEN_AS_LIVE);
+        }
+      }
+    }
+
+    bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);
+  }
+
+  auto &P = RPTracker.getPressure().MaxSetPressure;
+  for (unsigned I = 0, E = P.size(); I < E; ++I)
+    if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {
+      return true;
+    }
+  return false;
+}
+
 } // namespace
 
 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>

diff  --git a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
new file mode 100644
index 0000000000000..483b572ef06ff
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
@@ -0,0 +1,528 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
+
+# This test checks that too much register pressure will cause the modulo
+# schedule to be rejected and that a test with the same resource usage
+# but without register pressure is not rejected.
+
+--- |
+  define hidden float @high_pressure(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {
+  entry:
+    %cmp8 = icmp sgt i32 %sz, 0
+    br i1 %cmp8, label %for.body.preheader, label %for.end
+
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr float, float* %b, i32 -1
+    %scevgep4 = getelementptr float, float* %a, i32 -1
+    br label %for.body
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+    %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+    %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
+    %0 = load float, float* %scevgep7, align 4
+    %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1
+    %1 = load float, float* %scevgep3, align 4
+    %mul = fmul fast float %1, %0
+    %add = fadd fast float %mul, %sum.010
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+  for.end:                                          ; preds = %for.body, %entry
+    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+    ret float %sum.0.lcssa
+  }
+
+  !0 = distinct !{!0, !1, !2, !3}
+  !1 = !{!"llvm.loop.mustprogress"}
+  !2 = !{!"llvm.loop.unroll.disable"}
+  !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
+
+
+  define hidden float @low_pressure(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {
+  entry:
+    %cmp8 = icmp sgt i32 %sz, 0
+    br i1 %cmp8, label %for.body.preheader, label %for.end
+
+  for.body.preheader:                               ; preds = %entry
+    %scevgep = getelementptr float, float* %b, i32 -1
+    %scevgep4 = getelementptr float, float* %a, i32 -1
+    br label %for.body
+
+  for.body:                                         ; preds = %for.body.preheader, %for.body
+    %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
+    %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
+    %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
+    %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+    %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
+    %0 = load float, float* %scevgep7, align 4
+    %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1
+    %1 = load float, float* %scevgep3, align 4
+    %mul = fmul fast float %1, %0
+    %add = fadd fast float %mul, %sum.010
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1
+    %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !4
+
+  for.end:                                          ; preds = %for.body, %entry
+    %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+    ret float %sum.0.lcssa
+  }
+
+  !4 = distinct !{!4, !5, !6, !7}
+  !5 = !{!"llvm.loop.mustprogress"}
+  !6 = !{!"llvm.loop.unroll.disable"}
+  !7 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
+
+...
+---
+name:            high_pressure
+alignment:       2
+tracksRegLiveness: true
+constants:
+  - id:              0
+    value:           'float 0.000000e+00'
+    alignment:       4
+    isTargetSpecific: false
+body:             |
+  ; CHECK-LABEL: name: high_pressure
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnopc = COPY $r2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gprnopc = COPY $r1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gprnopc = COPY $r0
+  ; CHECK-NEXT:   t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.for.body.preheader:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr = COPY [[t2SUBri]]
+  ; CHECK-NEXT:   [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gpr = COPY [[t2SUBri1]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.for.body:
+  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.3(0x7c000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %10, %bb.3
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY3]], %bb.2, %12, %bb.3
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY]], %bb.2, %14, %bb.3
+  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.2, %16, %bb.3
+  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %18, %bb.3
+  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %20, %bb.3
+  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %22, %bb.3
+  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %24, %bb.3
+  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %26, %bb.3
+  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %28, %bb.3
+  ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %30, %bb.3
+  ; CHECK-NEXT:   [[PHI11:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %32, %bb.3
+  ; CHECK-NEXT:   [[PHI12:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %34, %bb.3
+  ; CHECK-NEXT:   [[PHI13:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %36, %bb.3
+  ; CHECK-NEXT:   [[PHI14:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %38, %bb.3
+  ; CHECK-NEXT:   [[PHI15:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %40, %bb.3
+  ; CHECK-NEXT:   [[PHI16:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %42, %bb.3
+  ; CHECK-NEXT:   [[PHI17:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %44, %bb.3
+  ; CHECK-NEXT:   [[PHI18:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %46, %bb.3
+  ; CHECK-NEXT:   [[PHI19:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %48, %bb.3
+  ; CHECK-NEXT:   [[PHI20:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %50, %bb.3
+  ; CHECK-NEXT:   [[PHI21:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %52, %bb.3
+  ; CHECK-NEXT:   [[PHI22:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.2, %54, %bb.3
+  ; CHECK-NEXT:   [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
+  ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gpr = COPY [[t2SUBri2]]
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri]]
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:rgpr = COPY [[PHI5]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:rgpr = COPY [[PHI6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:rgpr = COPY [[PHI7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:rgpr = COPY [[PHI8]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:rgpr = COPY [[PHI9]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:rgpr = COPY [[PHI10]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:rgpr = COPY [[PHI11]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:rgpr = COPY [[PHI12]]
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:rgpr = COPY [[PHI13]]
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:rgpr = COPY [[PHI14]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:rgpr = COPY [[PHI15]]
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:rgpr = COPY [[PHI16]]
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:rgpr = COPY [[PHI17]]
+  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:rgpr = COPY [[PHI18]]
+  ; CHECK-NEXT:   [[COPY23:%[0-9]+]]:rgpr = COPY [[PHI19]]
+  ; CHECK-NEXT:   [[COPY24:%[0-9]+]]:rgpr = COPY [[PHI20]]
+  ; CHECK-NEXT:   [[COPY25:%[0-9]+]]:rgpr = COPY [[PHI21]]
+  ; CHECK-NEXT:   [[COPY26:%[0-9]+]]:rgpr = COPY [[PHI22]]
+  ; CHECK-NEXT:   t2Bcc %bb.3, 1 /* CC::ne */, $cpsr
+  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.for.end:
+  ; CHECK-NEXT:   [[PHI23:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS]], %bb.3
+  ; CHECK-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI23]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $r0 = COPY [[VMOVRS]]
+  ; CHECK-NEXT:   tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+    liveins: $r0, $r1, $r2
+
+    %13:gprnopc = COPY $r2
+    %12:gprnopc = COPY $r1
+    %11:gprnopc = COPY $r0
+    t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
+
+  bb.4:
+    successors: %bb.3(0x80000000)
+
+    %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.1.for.body.preheader:
+    successors: %bb.2(0x80000000)
+
+    %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg
+    %0:gpr = COPY %16
+    %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg
+    %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+    %1:gpr = COPY %17
+
+  bb.2.for.body:
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+
+    %2:gprnopc = PHI %1, %bb.1, %9, %bb.2
+    %3:gprnopc = PHI %0, %bb.1, %8, %bb.2
+    %4:gprnopc = PHI %13, %bb.1, %7, %bb.2
+    %5:spr = PHI %15, %bb.1, %6, %bb.2
+    %101:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %102:gprnopc = PHI %1, %bb.1, %202, %bb.2
+    %103:gprnopc = PHI %1, %bb.1, %203, %bb.2
+    %104:gprnopc = PHI %1, %bb.1, %204, %bb.2
+    %105:gprnopc = PHI %1, %bb.1, %205, %bb.2
+    %106:gprnopc = PHI %1, %bb.1, %206, %bb.2
+    %107:gprnopc = PHI %1, %bb.1, %207, %bb.2
+    %108:gprnopc = PHI %1, %bb.1, %208, %bb.2
+    %109:gprnopc = PHI %1, %bb.1, %209, %bb.2
+    %110:gprnopc = PHI %1, %bb.1, %210, %bb.2
+    %111:gprnopc = PHI %1, %bb.1, %211, %bb.2
+    %112:gprnopc = PHI %1, %bb.1, %212, %bb.2
+    %113:gprnopc = PHI %1, %bb.1, %213, %bb.2
+    %114:gprnopc = PHI %1, %bb.1, %214, %bb.2
+    %115:gprnopc = PHI %1, %bb.1, %215, %bb.2
+    %116:gprnopc = PHI %1, %bb.1, %216, %bb.2
+    %117:gprnopc = PHI %1, %bb.1, %217, %bb.2
+    %118:gprnopc = PHI %1, %bb.1, %218, %bb.2
+    %119:gprnopc = PHI %1, %bb.1, %219, %bb.2
+    %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg
+    %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
+    %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
+    %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
+    %7:gpr = COPY %23
+    %8:gpr = COPY %20
+    %9:gpr = COPY %18
+    %201:rgpr = COPY %101
+    %202:rgpr = COPY %102
+    %203:rgpr = COPY %103
+    %204:rgpr = COPY %104
+    %205:rgpr = COPY %105
+    %206:rgpr = COPY %106
+    %207:rgpr = COPY %107
+    %208:rgpr = COPY %108
+    %209:rgpr = COPY %109
+    %210:rgpr = COPY %110
+    %211:rgpr = COPY %111
+    %212:rgpr = COPY %112
+    %213:rgpr = COPY %113
+    %214:rgpr = COPY %114
+    %215:rgpr = COPY %115
+    %216:rgpr = COPY %116
+    %217:rgpr = COPY %117
+    %218:rgpr = COPY %118
+    %219:rgpr = COPY %119
+    t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.3.for.end:
+    %10:spr = PHI %14, %bb.4, %6, %bb.2
+    %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg
+    $r0 = COPY %24
+    tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+
+...
+---
+name:            low_pressure
+alignment:       2
+tracksRegLiveness: true
+constants:
+  - id:              0
+    value:           'float 0.000000e+00'
+    alignment:       4
+    isTargetSpecific: false
+body:             |
+  ; CHECK-LABEL: name: low_pressure
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.2(0x50000000), %bb.1(0x30000000)
+  ; CHECK-NEXT:   liveins: $r0, $r1, $r2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gprnopc = COPY $r2
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gprnopc = COPY $r1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gprnopc = COPY $r0
+  ; CHECK-NEXT:   t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK-NEXT:   t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.for.body.preheader:
+  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]]
+  ; CHECK-NEXT:   [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5.for.body:
+  ; CHECK-NEXT:   successors: %bb.6(0x40000000), %bb.9(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %66:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %67:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %68:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %69:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %70:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %71:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %72:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %73:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %74:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %75:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %76:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %77:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %78:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %79:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %80:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %81:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %82:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   dead %83:rgpr = COPY [[COPY4]]
+  ; CHECK-NEXT:   t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
+  ; CHECK-NEXT:   t2B %bb.6, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6.for.body:
+  ; CHECK-NEXT:   successors: %bb.7(0x80000000), %bb.8(0x00000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
+  ; CHECK-NEXT:   [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
+  ; CHECK-NEXT:   [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:gpr = COPY [[t2ADDri]]
+  ; CHECK-NEXT:   [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %94:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %95:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %96:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %97:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %98:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %99:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %100:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %101:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %102:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %103:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %104:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %105:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %106:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %107:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %108:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %109:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %110:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   dead %111:rgpr = COPY [[COPY6]]
+  ; CHECK-NEXT:   t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
+  ; CHECK-NEXT:   t2B %bb.7, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7.for.body:
+  ; CHECK-NEXT:   successors: %bb.8(0x04000000), %bb.7(0x7c000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %116, %bb.7
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, %117, %bb.7
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %140, %bb.7
+  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %137, %bb.7
+  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %139, %bb.7
+  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %118, %bb.7
+  ; CHECK-NEXT:   [[VLDRS4:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
+  ; CHECK-NEXT:   [[VLDRS5:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
+  ; CHECK-NEXT:   [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
+  ; CHECK-NEXT:   [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   dead %119:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %120:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %121:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %122:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %123:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %124:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %125:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %126:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %127:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %128:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %129:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %130:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %131:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %132:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %133:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %134:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %135:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   dead %136:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:rgpr = COPY [[PHI4]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
+  ; CHECK-NEXT:   t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
+  ; CHECK-NEXT:   t2B %bb.8, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8:
+  ; CHECK-NEXT:   successors: %bb.9(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, [[COPY11]], %bb.7
+  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, [[COPY12]], %bb.7
+  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
+  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
+  ; CHECK-NEXT:   [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.9:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:gprnopc = PHI [[COPY4]], %bb.5, [[PHI6]], %bb.8
+  ; CHECK-NEXT:   [[PHI11:%[0-9]+]]:gprnopc = PHI [[COPY3]], %bb.5, [[PHI7]], %bb.8
+  ; CHECK-NEXT:   [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
+  ; CHECK-NEXT:   [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI10]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
+  ; CHECK-NEXT:   [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI11]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
+  ; CHECK-NEXT:   [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   t2B %bb.4, 14 /* CC::al */, $noreg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.for.end:
+  ; CHECK-NEXT:   [[PHI13:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS2]], %bb.9
+  ; CHECK-NEXT:   [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI13]], 14 /* CC::al */, $noreg
+  ; CHECK-NEXT:   $r0 = COPY [[VMOVRS]]
+  ; CHECK-NEXT:   tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+  bb.0.entry:
+    successors: %bb.1(0x50000000), %bb.4(0x30000000)
+    liveins: $r0, $r1, $r2
+
+    %13:gprnopc = COPY $r2
+    %12:gprnopc = COPY $r1
+    %11:gprnopc = COPY $r0
+    t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
+    t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
+
+  bb.4:
+    successors: %bb.3(0x80000000)
+
+    %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.1.for.body.preheader:
+    successors: %bb.2(0x80000000)
+
+    %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg
+    %0:gpr = COPY %16
+    %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg
+    %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
+    %1:gpr = COPY %17
+
+  bb.2.for.body:
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+
+    %2:gprnopc = PHI %1, %bb.1, %9, %bb.2
+    %3:gprnopc = PHI %0, %bb.1, %8, %bb.2
+    %4:gprnopc = PHI %13, %bb.1, %7, %bb.2
+    %5:spr = PHI %15, %bb.1, %6, %bb.2
+    %101:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %102:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %103:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %104:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %105:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %106:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %107:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %108:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %109:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %110:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %111:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %112:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %113:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %114:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %115:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %116:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %117:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %118:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %119:gprnopc = PHI %1, %bb.1, %201, %bb.2
+    %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg
+    %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
+    %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
+    %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
+    %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
+    %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+    %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
+    %7:gpr = COPY %23
+    %8:gpr = COPY %20
+    %9:gpr = COPY %18
+    %201:rgpr = COPY %101
+    %202:rgpr = COPY %101
+    %203:rgpr = COPY %101
+    %204:rgpr = COPY %101
+    %205:rgpr = COPY %101
+    %206:rgpr = COPY %101
+    %207:rgpr = COPY %101
+    %208:rgpr = COPY %101
+    %209:rgpr = COPY %101
+    %210:rgpr = COPY %101
+    %211:rgpr = COPY %101
+    %212:rgpr = COPY %101
+    %213:rgpr = COPY %101
+    %214:rgpr = COPY %101
+    %215:rgpr = COPY %101
+    %216:rgpr = COPY %101
+    %217:rgpr = COPY %101
+    %218:rgpr = COPY %101
+    %219:rgpr = COPY %101
+    t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
+    t2B %bb.3, 14 /* CC::al */, $noreg
+
+  bb.3.for.end:
+    %10:spr = PHI %14, %bb.4, %6, %bb.2
+    %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg
+    $r0 = COPY %24
+    tBX_RET 14 /* CC::al */, $noreg, implicit $r0
+
+...


        


More information about the llvm-commits mailing list