[llvm] acb7859 - [MachineSink] Extend loop sinking capability (#117247)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 23 17:08:27 PST 2025


Author: Jeffrey Byrnes
Date: 2025-01-23T17:08:23-08:00
New Revision: acb7859f075f91b1105c04c37c6aa85db27a898a

URL: https://github.com/llvm/llvm-project/commit/acb7859f075f91b1105c04c37c6aa85db27a898a
DIFF: https://github.com/llvm/llvm-project/commit/acb7859f075f91b1105c04c37c6aa85db27a898a.diff

LOG: [MachineSink] Extend loop sinking capability (#117247)

The current MIR cycle sinking capabilities are rather limited. It only
support sinking copies into a single successor block while obeying
limits.

This opt-in feature adds a more aggressive option, that is not limited
to the above concerns. The feature will try to "sink" by duplicating any
top-level preheader instruction (that we are sure is safe to sink) into
any user block, then does some dead code cleanup. In particular, this is
useful for high RP situations when loop bodies have control flow.

Added: 
    llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir

Modified: 
    llvm/lib/CodeGen/MachineSink.cpp
    llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
    llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
    llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 03d93cecaa5963..5cb5c0bf40a080 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -45,6 +45,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split");
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
 
+using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+
 namespace {
 
 class MachineSinking : public MachineFunctionPass {
@@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass {
   const MachineBranchProbabilityInfo *MBPI = nullptr;
   AliasAnalysis *AA = nullptr;
   RegisterClassInfo RegClassInfo;
+  TargetSchedModel SchedModel;
 
   // Remember which edges have been considered for breaking.
   SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
@@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass {
   /// would re-order assignments.
   using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
 
+  using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
+
   /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
   /// debug instructions to sink.
   SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
@@ -255,7 +261,10 @@ class MachineSinking : public MachineFunctionPass {
 
   void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
                                SmallVectorImpl<MachineInstr *> &Candidates);
-  bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
+
+  bool
+  aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I,
+                            DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
 
   bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
                             MachineBasicBlock *MBB,
@@ -271,11 +280,14 @@ class MachineSinking : public MachineFunctionPass {
   GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
                          AllSuccsCache &AllSuccessors) const;
 
-  std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
+  std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
+                                               bool UseCache = true);
 
   bool registerPressureSetExceedsLimit(unsigned NRegs,
                                        const TargetRegisterClass *RC,
                                        const MachineBasicBlock &MBB);
+
+  bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
 };
 
 } // end anonymous namespace
@@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
     SmallVectorImpl<MachineInstr *> &Candidates) {
   for (auto &MI : *BB) {
     LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
+    if (MI.isMetaInstruction()) {
+      LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n");
+      continue;
+    }
     if (!TII->shouldSink(MI)) {
       LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
                            "target\n");
@@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
 
   if (SinkInstsIntoCycle) {
     SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
-    for (auto *Cycle : Cycles) {
-      MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
-      if (!Preheader) {
-        LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
-        continue;
-      }
-      SmallVector<MachineInstr *, 8> Candidates;
-      FindCycleSinkCandidates(Cycle, Preheader, Candidates);
-
-      // Walk the candidates in reverse order so that we start with the use
-      // of a def-use chain, if there is any.
-      // TODO: Sort the candidates using a cost-model.
-      unsigned i = 0;
-      for (MachineInstr *I : llvm::reverse(Candidates)) {
-        if (i++ == SinkIntoCycleLimit) {
-          LLVM_DEBUG(dbgs() << "CycleSink:   Limit reached of instructions to "
-                               "be analysed.");
-          break;
+    SchedModel.init(STI);
+    bool HasHighPressure;
+
+    DenseMap<SinkItem, MachineInstr *> SunkInstrs;
+
+    enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
+    for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
+         ++Stage, SunkInstrs.clear()) {
+      HasHighPressure = false;
+
+      for (auto *Cycle : Cycles) {
+        MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+        if (!Preheader) {
+          LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
+          continue;
         }
+        SmallVector<MachineInstr *, 8> Candidates;
+        FindCycleSinkCandidates(Cycle, Preheader, Candidates);
+
+        unsigned i = 0;
+
+        // Walk the candidates in reverse order so that we start with the use
+        // of a def-use chain, if there is any.
+        // TODO: Sort the candidates using a cost-model.
+        for (MachineInstr *I : llvm::reverse(Candidates)) {
+          // CycleSinkStage::COPY: Sink a limited number of copies
+          if (Stage == CycleSinkStage::COPY) {
+            if (i++ == SinkIntoCycleLimit) {
+              LLVM_DEBUG(dbgs()
+                         << "CycleSink:   Limit reached of instructions to "
+                            "be analyzed.");
+              break;
+            }
+
+            if (!I->isCopy())
+              continue;
+          }
 
-        if (!SinkIntoCycle(Cycle, *I))
-          break;
-        EverMadeChange = true;
-        ++NumCycleSunk;
+          // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
+          // which the target specifies as low-latency
+          if (Stage == CycleSinkStage::LOW_LATENCY &&
+              !TII->hasLowDefLatency(SchedModel, *I, 0))
+            continue;
+
+          if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs))
+            break;
+          EverMadeChange = true;
+          ++NumCycleSunk;
+        }
+
+        // Recalculate the pressure after sinking
+        if (!HasHighPressure)
+          HasHighPressure = registerPressureExceedsLimit(*Preheader);
       }
+      if (!HasHighPressure)
+        break;
     }
   }
 
@@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
 }
 
 std::vector<unsigned> &
-MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
+MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
+                                      bool UseCache) {
   // Currently to save compiling time, MBB's register pressure will not change
   // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
   // register pressure is changed after sinking any instructions into it.
   // FIXME: need a accurate and cheap register pressure estiminate model here.
+
   auto RP = CachedRegisterPressure.find(&MBB);
-  if (RP != CachedRegisterPressure.end())
+  if (UseCache && RP != CachedRegisterPressure.end())
     return RP->second;
 
   RegionPressure Pressure;
@@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
   }
 
   RPTracker.closeRegion();
+
+  if (RP != CachedRegisterPressure.end()) {
+    CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
+    return CachedRegisterPressure[&MBB];
+  }
+
   auto It = CachedRegisterPressure.insert(
       std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
   return It.first->second;
@@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
   return false;
 }
 
+// Recalculate RP and check if any pressure set exceeds the set limit.
+bool MachineSinking::registerPressureExceedsLimit(
+    const MachineBasicBlock &MBB) {
+  std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, false);
+
+  for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
+    if (BBRegisterPressure[PS] >=
+        TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
 bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
                                           MachineBasicBlock *MBB,
@@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
   return HasAliasedStore;
 }
 
-/// Sink instructions into cycles if profitable. This especially tries to
-/// prevent register spills caused by register pressure if there is little to no
-/// overhead moving instructions into cycles.
-bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
-  LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
+/// Aggressively sink instructions into cycles. This will aggressively try to
+/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
+/// In particular, it will sink into multiple successor blocks without limits
+/// based on the amount of sinking, or the type of ops being sunk (so long as
+/// they are safe to sink).
+bool MachineSinking::aggressivelySinkIntoCycle(
+    MachineCycle *Cycle, MachineInstr &I,
+    DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
+  // TODO: support instructions with multiple defs
+  if (I.getNumDefs() > 1)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
   MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
   assert(Preheader && "Cycle sink needs a preheader block");
-  MachineBasicBlock *SinkBlock = nullptr;
-  bool CanSink = true;
-  const MachineOperand &MO = I.getOperand(0);
-
-  for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
-    LLVM_DEBUG(dbgs() << "CycleSink:   Analysing use: " << MI);
-    if (!Cycle->contains(MI.getParent())) {
-      LLVM_DEBUG(dbgs() << "CycleSink:   Use not in cycle, can't sink.\n");
-      CanSink = false;
-      break;
-    }
+  SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
 
-    // FIXME: Come up with a proper cost model that estimates whether sinking
-    // the instruction (and thus possibly executing it on every cycle
-    // iteration) is more expensive than a register.
-    // For now assumes that copies are cheap and thus almost always worth it.
-    if (!MI.isCopy()) {
-      LLVM_DEBUG(dbgs() << "CycleSink:   Use is not a copy\n");
-      CanSink = false;
-      break;
+  MachineOperand &DefMO = I.getOperand(0);
+  for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) {
+    Uses.push_back({{DefMO.getReg(), DefMO.getSubReg()}, &MI});
+  }
+
+  for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
+    MachineInstr *MI = Entry.second;
+    LLVM_DEBUG(dbgs() << "AggressiveCycleSink:   Analysing use: " << MI);
+    if (MI->isPHI()) {
+      LLVM_DEBUG(
+          dbgs() << "AggressiveCycleSink:   Not attempting to sink for PHI.\n");
+      continue;
     }
-    if (!SinkBlock) {
-      SinkBlock = MI.getParent();
-      LLVM_DEBUG(dbgs() << "CycleSink:   Setting sink block to: "
-                        << printMBBReference(*SinkBlock) << "\n");
+    // We cannot sink before the prologue
+    if (MI->isPosition() || TII->isBasicBlockPrologue(*MI)) {
+      LLVM_DEBUG(dbgs() << "AggressiveCycleSink:   Use is BasicBlock prologue, "
+                           "can't sink.\n");
       continue;
     }
-    SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
-    if (!SinkBlock) {
-      LLVM_DEBUG(dbgs() << "CycleSink:   Can't find nearest dominator\n");
-      CanSink = false;
-      break;
+    if (!Cycle->contains(MI->getParent())) {
+      LLVM_DEBUG(
+          dbgs() << "AggressiveCycleSink:   Use not in cycle, can't sink.\n");
+      continue;
     }
-    LLVM_DEBUG(dbgs() << "CycleSink:   Setting nearest common dom block: "
-                      << printMBBReference(*SinkBlock) << "\n");
-  }
 
-  if (!CanSink) {
-    LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
-    return false;
-  }
-  if (!SinkBlock) {
-    LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
-    return false;
-  }
-  if (SinkBlock == Preheader) {
-    LLVM_DEBUG(
-        dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
-    return false;
-  }
-  if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
-    LLVM_DEBUG(
-        dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
-    return false;
-  }
+    MachineBasicBlock *SinkBlock = MI->getParent();
+    MachineInstr *NewMI = nullptr;
+    SinkItem MapEntry(&I, SinkBlock);
+
+    auto SI = SunkInstrs.find(MapEntry);
+
+    // Check for the case in which we have already sunk a copy of this
+    // instruction into the user block.
+    if (SI != SunkInstrs.end()) {
+      LLVM_DEBUG(dbgs() << "AggressiveCycleSink:   Already sunk to block: "
+                        << printMBBReference(*SinkBlock) << "\n");
+      NewMI = SI->second;
+    }
 
-  LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
-  SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
-                    I);
+    // Create a copy of the instruction in the use block.
+    if (!NewMI) {
+      LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
+                        << printMBBReference(*SinkBlock) << "\n");
+
+      NewMI = I.getMF()->CloneMachineInstr(&I);
+      if (DefMO.getReg().isVirtual()) {
+        const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg());
+        Register DestReg = MRI->createVirtualRegister(TRC);
+        NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(),
+                                  *TRI);
+      }
+      SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()),
+                        NewMI);
+      SunkInstrs.insert({MapEntry, NewMI});
+    }
 
-  // Conservatively clear any kill flags on uses of sunk instruction
-  for (MachineOperand &MO : I.operands()) {
-    if (MO.isReg() && MO.readsReg())
+    // Conservatively clear any kill flags on uses of sunk instruction
+    for (MachineOperand &MO : NewMI->all_uses()) {
+      assert(MO.isReg() && MO.isUse());
       RegsToClearKillFlags.insert(MO.getReg());
-  }
+    }
 
-  // The instruction is moved from its basic block, so do not retain the
-  // debug information.
-  assert(!I.isDebugInstr() && "Should not sink debug inst");
-  I.setDebugLoc(DebugLoc());
+    // The instruction is moved from its basic block, so do not retain the
+    // debug information.
+    assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
+    NewMI->setDebugLoc(DebugLoc());
+
+    // Replace the use with the newly created virtual register.
+    RegSubRegPair &UseReg = Entry.first;
+    MI->substituteRegister(UseReg.Reg, NewMI->getOperand(0).getReg(),
+                           UseReg.SubReg, *TRI);
+  }
+  // If we have replaced all uses, then delete the dead instruction
+  if (I.isDead(*MRI))
+    I.eraseFromParent();
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir
new file mode 100644
index 00000000000000..bca1517ed183ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir
@@ -0,0 +1,1272 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --sink-insts-to-avoid-spills=1 --stop-after=machine-sink -o -  %s | FileCheck -check-prefixes=GFX9-SUNK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 --sink-insts-to-avoid-spills=1 -mattr=+wavefrontsize64 --stop-after=machine-sink -o -  %s | FileCheck -check-prefixes=GFX10-SUNK %s
+
+---
+name:            test_sink_copy
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX9-SUNK-LABEL: name: test_sink_copy
+  ; GFX9-SUNK: bb.0:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.1:
+  ; GFX9-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.2:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.3:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.4:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.5:
+  ; GFX9-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.6.entry:
+  ;
+  ; GFX10-SUNK-LABEL: name: test_sink_copy
+  ; GFX10-SUNK: bb.0:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.1:
+  ; GFX10-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.2:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.3:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.4:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.5:
+  ; GFX10-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.6.entry:
+  bb.0:
+    %0:vreg_256 = IMPLICIT_DEF
+    %1:vreg_256 = IMPLICIT_DEF
+    %2:vreg_256 = IMPLICIT_DEF
+    %3:vreg_256 = IMPLICIT_DEF
+    %4:vreg_256 = IMPLICIT_DEF
+    %5:vreg_256 = COPY %4
+    %6:vreg_256 = COPY %4
+    %7:vreg_256 = COPY %4
+    %8:vreg_256 = COPY %4
+    %9:vreg_256 = COPY %4
+
+
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+    S_BRANCH %bb.4
+
+  bb.3:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+    S_BRANCH %bb.5
+
+  bb.5:
+    S_ENDPGM 0
+...
+
+# For gfx9, after sinking the copies, pressure is within the desired limit
+
+---
+name:            test_sink_multi_stage
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX9-SUNK-LABEL: name: test_sink_multi_stage
+  ; GFX9-SUNK: bb.0:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.1:
+  ; GFX9-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.2:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.3:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.4:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.5:
+  ; GFX9-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.6.entry:
+  ;
+  ; GFX10-SUNK-LABEL: name: test_sink_multi_stage
+  ; GFX10-SUNK: bb.0:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.1:
+  ; GFX10-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.2:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.3:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.4:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.5:
+  ; GFX10-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.6.entry:
+  bb.0:
+    %0:vreg_256 = IMPLICIT_DEF
+    %1:vreg_256 = IMPLICIT_DEF
+    %2:vreg_256 = IMPLICIT_DEF
+    %3:vreg_256 = IMPLICIT_DEF
+    %4:vreg_256 = IMPLICIT_DEF
+    %5:vreg_256 = COPY %4
+    %6:vreg_256 = COPY %4
+    %7:vreg_256 = COPY %4
+    %8:vreg_256 = COPY %4
+    %9:vreg_256 = COPY %4
+    %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+    INLINEASM &"", 1, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+    S_BRANCH %bb.4
+
+  bb.3:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+    INLINEASM &"", 1, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+    S_BRANCH %bb.5
+
+  bb.5:
+    S_ENDPGM 0
+...
+
+---
+name:            test_sink_low_rp
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX9-SUNK-LABEL: name: test_sink_low_rp
+  ; GFX9-SUNK: bb.0:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.1:
+  ; GFX9-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.2:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.3:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.4:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.5:
+  ; GFX9-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.6.entry:
+  ;
+  ; GFX10-SUNK-LABEL: name: test_sink_low_rp
+  ; GFX10-SUNK: bb.0:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.1:
+  ; GFX10-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.2:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.3:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.4:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.5:
+  ; GFX10-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.6.entry:
+  bb.0:
+    %0:vreg_256 = IMPLICIT_DEF
+    %1:vreg_256 = IMPLICIT_DEF
+    %2:vreg_256 = IMPLICIT_DEF
+    %3:vreg_256 = IMPLICIT_DEF
+    %4:vreg_256 = IMPLICIT_DEF
+    %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25
+    S_BRANCH %bb.4
+
+  bb.3:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+    S_BRANCH %bb.5
+
+  bb.5:
+    S_ENDPGM 0
+...
+
+---
+name:            test_sink_high_rp
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX9-SUNK-LABEL: name: test_sink_high_rp
+  ; GFX9-SUNK: bb.0:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.1:
+  ; GFX9-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.2:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.3:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.4:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.5:
+  ; GFX9-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.6.entry:
+  ;
+  ; GFX10-SUNK-LABEL: name: test_sink_high_rp
+  ; GFX10-SUNK: bb.0:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.1:
+  ; GFX10-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.2:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.3:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.4:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.5:
+  ; GFX10-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.6.entry:
+  bb.0:
+    %0:vreg_256 = IMPLICIT_DEF
+    %1:vreg_256 = IMPLICIT_DEF
+    %2:vreg_256 = IMPLICIT_DEF
+    %3:vreg_256 = IMPLICIT_DEF
+    %4:vreg_256 = IMPLICIT_DEF
+    %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+
+    S_BRANCH %bb.1
+
+  bb.1:
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+    S_BRANCH %bb.4
+
+  bb.3:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+    S_BRANCH %bb.5
+
+  bb.5:
+    S_ENDPGM 0
+...
+
+# Do not sink convergent op (MFMA)
+
+---
+name:            test_sink_convergent
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX9-SUNK-LABEL: name: test_sink_convergent
+  ; GFX9-SUNK: bb.0:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF5]], [[DEF7]], [[DEF6]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.1:
+  ; GFX9-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF8]], implicit [[V_MFMA_F32_4X4X1F32_e64_]]
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.2:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.3:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.4:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.5:
+  ; GFX9-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.6.entry:
+  ;
+  ; GFX10-SUNK-LABEL: name: test_sink_convergent
+  ; GFX10-SUNK: bb.0:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF5]], [[DEF7]], [[DEF6]], 0, 0, 0, implicit $mode, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.1:
+  ; GFX10-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF8]], implicit [[V_MFMA_F32_4X4X1F32_e64_]]
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.2:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.3:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.4:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.5:
+  ; GFX10-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.6.entry:
+  bb.0:
+    %0:vreg_256 = IMPLICIT_DEF
+    %1:vreg_256 = IMPLICIT_DEF
+    %2:vreg_256 = IMPLICIT_DEF
+    %3:vreg_256 = IMPLICIT_DEF
+    %4:vreg_256 = IMPLICIT_DEF
+    %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %40:vgpr_32 = IMPLICIT_DEF
+    %41:areg_128_align2 = IMPLICIT_DEF
+    %42:vgpr_32 = IMPLICIT_DEF
+    %43:vgpr_32 = IMPLICIT_DEF
+    %44:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %40, %42, %41, 0, 0, 0, implicit $mode, implicit $exec
+
+
+    S_BRANCH %bb.1
+
+  bb.1:
+    INLINEASM &"", 1, implicit %43, implicit %44
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+    S_BRANCH %bb.4
+
+  bb.3:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+    S_BRANCH %bb.5
+
+  bb.5:
+    S_ENDPGM 0
+...
+
+# Do not sink instructions with multiple defs
+
+---
+name:            test_sink_multi_def
+alignment:       1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+body:             |
+  ; GFX9-SUNK-LABEL: name: test_sink_multi_def
+  ; GFX9-SUNK: bb.0:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.1:
+  ; GFX9-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]]
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.2:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.3:
+  ; GFX9-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX9-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.4:
+  ; GFX9-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX9-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.5:
+  ; GFX9-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX9-SUNK-NEXT: {{  $}}
+  ; GFX9-SUNK-NEXT: bb.6.entry:
+  ;
+  ; GFX10-SUNK-LABEL: name: test_sink_multi_def
+  ; GFX10-SUNK: bb.0:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.1
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.1:
+  ; GFX10-SUNK-NEXT:   successors: %bb.2(0x40000000), %bb.3(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]]
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.2:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.3:
+  ; GFX10-SUNK-NEXT:   successors: %bb.4(0x80000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+  ; GFX10-SUNK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.4
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.4:
+  ; GFX10-SUNK-NEXT:   successors: %bb.1(0x40000000), %bb.5(0x40000000)
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+  ; GFX10-SUNK-NEXT:   S_BRANCH %bb.5
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.5:
+  ; GFX10-SUNK-NEXT:   S_ENDPGM 0
+  ; GFX10-SUNK-NEXT: {{  $}}
+  ; GFX10-SUNK-NEXT: bb.6.entry:
+  bb.0:
+    %0:vreg_256 = IMPLICIT_DEF
+    %1:vreg_256 = IMPLICIT_DEF
+    %2:vreg_256 = IMPLICIT_DEF
+    %3:vreg_256 = IMPLICIT_DEF
+    %4:vreg_256 = IMPLICIT_DEF
+    %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+    %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+    %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+    %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+    %32:vgpr_32, %33:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub2:vreg_256, %1.sub4:vreg_256, 0, implicit $exec
+
+    S_BRANCH %bb.1
+
+  bb.1:
+    INLINEASM &"", 1, implicit %32, implicit %33
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+    S_BRANCH %bb.4
+
+  bb.3:
+    INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+    INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+    S_BRANCH %bb.4
+
+  bb.4:
+    S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+    S_BRANCH %bb.5
+
+  bb.5:
+    S_ENDPGM 0
+...
+

diff  --git a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
index efa21052e3ae2f..0fc31ea9d64379 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
@@ -1,5 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink --sink-insts-to-avoid-spills=1 -o - %s | FileCheck -check-prefixes=GFX9 %s
+
 
 ---
 name:            test_sink_fmac_to_only_use

diff  --git a/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
index 04c80582f6f079..ef6771278b06f3 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
@@ -1,78 +1,79 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o -  %s | FileCheck -check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink --sink-insts-to-avoid-spills=1 -o -  %s | FileCheck -check-prefixes=GFX10 %s
 
 ---
 name: multi_else_break
 tracksRegLiveness: true
 body: |
-  ; CHECK-LABEL: name: multi_else_break
-  ; CHECK: bb.0:
-  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
-  ; CHECK-NEXT:   liveins: $vgpr4, $vgpr5
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
-  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.1:
-  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6
-  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
-  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.5(0x40000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5
-  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5
-  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5
-  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5
-  ; CHECK-NEXT:   [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
-  ; CHECK-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc
-  ; CHECK-NEXT:   [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc
-  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   S_BRANCH %bb.4
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   S_ENDPGM 0
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4:
-  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec
-  ; CHECK-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec
-  ; CHECK-NEXT:   [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
-  ; CHECK-NEXT:   [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc
-  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc
-  ; CHECK-NEXT:   [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.5:
-  ; CHECK-NEXT:   successors: %bb.6(0x04000000), %bb.2(0x7c000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4
-  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4
-  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4
-  ; CHECK-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc
-  ; CHECK-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   S_BRANCH %bb.6
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.6:
-  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5
-  ; CHECK-NEXT:   SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc
-  ; CHECK-NEXT:   SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; GFX10-LABEL: name: multi_else_break
+  ; GFX10: bb.0:
+  ; GFX10-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-NEXT:   liveins: $vgpr4, $vgpr5
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+  ; GFX10-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+  ; GFX10-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GFX10-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
+  ; GFX10-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GFX10-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GFX10-NEXT:   [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; GFX10-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.2(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6
+  ; GFX10-NEXT:   [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.4(0x40000000), %bb.5(0x40000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5
+  ; GFX10-NEXT:   [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5
+  ; GFX10-NEXT:   [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5
+  ; GFX10-NEXT:   [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5
+  ; GFX10-NEXT:   [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec
+  ; GFX10-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
+  ; GFX10-NEXT:   [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.4
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.3:
+  ; GFX10-NEXT:   SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX10-NEXT:   S_ENDPGM 0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.4:
+  ; GFX10-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec
+  ; GFX10-NEXT:   [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec
+  ; GFX10-NEXT:   [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
+  ; GFX10-NEXT:   [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc
+  ; GFX10-NEXT:   [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.5:
+  ; GFX10-NEXT:   successors: %bb.6(0x04000000), %bb.2(0x7c000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4
+  ; GFX10-NEXT:   [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4
+  ; GFX10-NEXT:   [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4
+  ; GFX10-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX10-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc
+  ; GFX10-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.6
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.6:
+  ; GFX10-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5
+  ; GFX10-NEXT:   SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX10-NEXT:   [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc
+  ; GFX10-NEXT:   SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.3
   bb.0:
     successors: %bb.1(0x80000000)
     liveins: $vgpr4, $vgpr5

diff  --git a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
index 43c286a830b42e..f23afe52f97de8 100644
--- a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
+++ b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
@@ -25,23 +25,24 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[LARL:%[0-9]+]]:addr64bit = LARL @b
+  ; CHECK-NEXT:   [[LA:%[0-9]+]]:gr64bit = LA killed [[LARL]], 49, $noreg
+  ; CHECK-NEXT:   [[LGHI:%[0-9]+]]:gr64bit = LGHI 7
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[LA:%[0-9]+]]:gr64bit = LA [[LARL]], 49, $noreg
-  ; CHECK-NEXT:   [[LGHI:%[0-9]+]]:gr64bit = LGHI 7
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF
-  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64bit = COPY [[LA]]
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0
   ; CHECK-NEXT:   $r2d = COPY [[DEF]]
-  ; CHECK-NEXT:   $r3d = COPY [[LA]]
+  ; CHECK-NEXT:   $r3d = COPY [[COPY]]
   ; CHECK-NEXT:   $r4d = COPY [[LGHI]]
   ; CHECK-NEXT:   CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0
   ; CHECK-NEXT:   $r2d = COPY [[DEF1]]
-  ; CHECK-NEXT:   $r3d = COPY [[LA]]
+  ; CHECK-NEXT:   $r3d = COPY [[COPY]]
   ; CHECK-NEXT:   $r4d = COPY [[LGHI]]
   ; CHECK-NEXT:   CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0
@@ -54,19 +55,20 @@ body:             |
     %2:gr64bit = LGHI 7
     %3:gr64bit = IMPLICIT_DEF
     %5:gr64bit = IMPLICIT_DEF
+    %6:gr64bit = COPY killed %0
 
   bb.1:
     successors: %bb.1(0x80000000)
 
     ADJCALLSTACKDOWN 0, 0
     $r2d = COPY %3
-    $r3d = COPY %0
+    $r3d = COPY %6
     $r4d = COPY %2
     CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
     ADJCALLSTACKUP 0, 0
     ADJCALLSTACKDOWN 0, 0
     $r2d = COPY %5
-    $r3d = COPY %0
+    $r3d = COPY %6
     $r4d = COPY %2
     CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
     ADJCALLSTACKUP 0, 0


        


More information about the llvm-commits mailing list