[llvm] acb7859 - [MachineSink] Extend loop sinking capability (#117247)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 17:08:27 PST 2025
Author: Jeffrey Byrnes
Date: 2025-01-23T17:08:23-08:00
New Revision: acb7859f075f91b1105c04c37c6aa85db27a898a
URL: https://github.com/llvm/llvm-project/commit/acb7859f075f91b1105c04c37c6aa85db27a898a
DIFF: https://github.com/llvm/llvm-project/commit/acb7859f075f91b1105c04c37c6aa85db27a898a.diff
LOG: [MachineSink] Extend loop sinking capability (#117247)
The current MIR cycle sinking capabilities are rather limited. It only
support sinking copies into a single successor block while obeying
limits.
This opt-in feature adds a more aggressive option, that is not limited
to the above concerns. The feature will try to "sink" by duplicating any
top-level preheader instruction (that we are sure is safe to sink) into
any user block, then does some dead code cleanup. In particular, this is
useful for high RP situations when loop bodies have control flow.
Added:
llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir
Modified:
llvm/lib/CodeGen/MachineSink.cpp
llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 03d93cecaa5963..5cb5c0bf40a080 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split");
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
+using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
+
namespace {
class MachineSinking : public MachineFunctionPass {
@@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI = nullptr;
AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
+ TargetSchedModel SchedModel;
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
@@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass {
/// would re-order assignments.
using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
+ using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
+
/// Record of DBG_VALUE uses of vregs in a block, so that we can identify
/// debug instructions to sink.
SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
@@ -255,7 +261,10 @@ class MachineSinking : public MachineFunctionPass {
void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
SmallVectorImpl<MachineInstr *> &Candidates);
- bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
+
+ bool
+ aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I,
+ DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -271,11 +280,14 @@ class MachineSinking : public MachineFunctionPass {
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccsCache &AllSuccessors) const;
- std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
+ std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
+ bool UseCache = true);
bool registerPressureSetExceedsLimit(unsigned NRegs,
const TargetRegisterClass *RC,
const MachineBasicBlock &MBB);
+
+ bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
};
} // end anonymous namespace
@@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
SmallVectorImpl<MachineInstr *> &Candidates) {
for (auto &MI : *BB) {
LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
+ if (MI.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n");
+ continue;
+ }
if (!TII->shouldSink(MI)) {
LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
"target\n");
@@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
if (SinkInstsIntoCycle) {
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
- for (auto *Cycle : Cycles) {
- MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
- if (!Preheader) {
- LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
- continue;
- }
- SmallVector<MachineInstr *, 8> Candidates;
- FindCycleSinkCandidates(Cycle, Preheader, Candidates);
-
- // Walk the candidates in reverse order so that we start with the use
- // of a def-use chain, if there is any.
- // TODO: Sort the candidates using a cost-model.
- unsigned i = 0;
- for (MachineInstr *I : llvm::reverse(Candidates)) {
- if (i++ == SinkIntoCycleLimit) {
- LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
- "be analysed.");
- break;
+ SchedModel.init(STI);
+ bool HasHighPressure;
+
+ DenseMap<SinkItem, MachineInstr *> SunkInstrs;
+
+ enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
+ for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
+ ++Stage, SunkInstrs.clear()) {
+ HasHighPressure = false;
+
+ for (auto *Cycle : Cycles) {
+ MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
+ if (!Preheader) {
+ LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
+ continue;
}
+ SmallVector<MachineInstr *, 8> Candidates;
+ FindCycleSinkCandidates(Cycle, Preheader, Candidates);
+
+ unsigned i = 0;
+
+ // Walk the candidates in reverse order so that we start with the use
+ // of a def-use chain, if there is any.
+ // TODO: Sort the candidates using a cost-model.
+ for (MachineInstr *I : llvm::reverse(Candidates)) {
+ // CycleSinkStage::COPY: Sink a limited number of copies
+ if (Stage == CycleSinkStage::COPY) {
+ if (i++ == SinkIntoCycleLimit) {
+ LLVM_DEBUG(dbgs()
+ << "CycleSink: Limit reached of instructions to "
+ "be analyzed.");
+ break;
+ }
+
+ if (!I->isCopy())
+ continue;
+ }
- if (!SinkIntoCycle(Cycle, *I))
- break;
- EverMadeChange = true;
- ++NumCycleSunk;
+ // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
+ // which the target specifies as low-latency
+ if (Stage == CycleSinkStage::LOW_LATENCY &&
+ !TII->hasLowDefLatency(SchedModel, *I, 0))
+ continue;
+
+ if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs))
+ break;
+ EverMadeChange = true;
+ ++NumCycleSunk;
+ }
+
+ // Recalculate the pressure after sinking
+ if (!HasHighPressure)
+ HasHighPressure = registerPressureExceedsLimit(*Preheader);
}
+ if (!HasHighPressure)
+ break;
}
}
@@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
}
std::vector<unsigned> &
-MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
+MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
+ bool UseCache) {
// Currently to save compiling time, MBB's register pressure will not change
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
// register pressure is changed after sinking any instructions into it.
// FIXME: need a accurate and cheap register pressure estiminate model here.
+
auto RP = CachedRegisterPressure.find(&MBB);
- if (RP != CachedRegisterPressure.end())
+ if (UseCache && RP != CachedRegisterPressure.end())
return RP->second;
RegionPressure Pressure;
@@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
}
RPTracker.closeRegion();
+
+ if (RP != CachedRegisterPressure.end()) {
+ CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
+ return CachedRegisterPressure[&MBB];
+ }
+
auto It = CachedRegisterPressure.insert(
std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
return It.first->second;
@@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
return false;
}
+// Recalculate RP and check if any pressure set exceeds the set limit.
+bool MachineSinking::registerPressureExceedsLimit(
+ const MachineBasicBlock &MBB) {
+ std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, false);
+
+ for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
+ if (BBRegisterPressure[PS] >=
+ TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
MachineBasicBlock *MBB,
@@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
return HasAliasedStore;
}
-/// Sink instructions into cycles if profitable. This especially tries to
-/// prevent register spills caused by register pressure if there is little to no
-/// overhead moving instructions into cycles.
-bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
- LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
+/// Aggressively sink instructions into cycles. This will aggressively try to
+/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
+/// In particular, it will sink into multiple successor blocks without limits
+/// based on the amount of sinking, or the type of ops being sunk (so long as
+/// they are safe to sink).
+bool MachineSinking::aggressivelySinkIntoCycle(
+ MachineCycle *Cycle, MachineInstr &I,
+ DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
+ // TODO: support instructions with multiple defs
+ if (I.getNumDefs() > 1)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
assert(Preheader && "Cycle sink needs a preheader block");
- MachineBasicBlock *SinkBlock = nullptr;
- bool CanSink = true;
- const MachineOperand &MO = I.getOperand(0);
-
- for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
- LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
- if (!Cycle->contains(MI.getParent())) {
- LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
- CanSink = false;
- break;
- }
+ SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
- // FIXME: Come up with a proper cost model that estimates whether sinking
- // the instruction (and thus possibly executing it on every cycle
- // iteration) is more expensive than a register.
- // For now assumes that copies are cheap and thus almost always worth it.
- if (!MI.isCopy()) {
- LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
- CanSink = false;
- break;
+ MachineOperand &DefMO = I.getOperand(0);
+ for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) {
+ Uses.push_back({{DefMO.getReg(), DefMO.getSubReg()}, &MI});
+ }
+
+ for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
+ MachineInstr *MI = Entry.second;
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
+ if (MI->isPHI()) {
+ LLVM_DEBUG(
+ dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
+ continue;
}
- if (!SinkBlock) {
- SinkBlock = MI.getParent();
- LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
- << printMBBReference(*SinkBlock) << "\n");
+ // We cannot sink before the prologue
+ if (MI->isPosition() || TII->isBasicBlockPrologue(*MI)) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
+ "can't sink.\n");
continue;
}
- SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
- if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
- CanSink = false;
- break;
+ if (!Cycle->contains(MI->getParent())) {
+ LLVM_DEBUG(
+ dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
+ continue;
}
- LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: "
- << printMBBReference(*SinkBlock) << "\n");
- }
- if (!CanSink) {
- LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
- return false;
- }
- if (!SinkBlock) {
- LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
- return false;
- }
- if (SinkBlock == Preheader) {
- LLVM_DEBUG(
- dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
- return false;
- }
- if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
- LLVM_DEBUG(
- dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
- return false;
- }
+ MachineBasicBlock *SinkBlock = MI->getParent();
+ MachineInstr *NewMI = nullptr;
+ SinkItem MapEntry(&I, SinkBlock);
+
+ auto SI = SunkInstrs.find(MapEntry);
+
+ // Check for the case in which we have already sunk a copy of this
+ // instruction into the user block.
+ if (SI != SunkInstrs.end()) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
+ << printMBBReference(*SinkBlock) << "\n");
+ NewMI = SI->second;
+ }
- LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
- SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
- I);
+ // Create a copy of the instruction in the use block.
+ if (!NewMI) {
+ LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
+ << printMBBReference(*SinkBlock) << "\n");
+
+ NewMI = I.getMF()->CloneMachineInstr(&I);
+ if (DefMO.getReg().isVirtual()) {
+ const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg());
+ Register DestReg = MRI->createVirtualRegister(TRC);
+ NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(),
+ *TRI);
+ }
+ SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()),
+ NewMI);
+ SunkInstrs.insert({MapEntry, NewMI});
+ }
- // Conservatively clear any kill flags on uses of sunk instruction
- for (MachineOperand &MO : I.operands()) {
- if (MO.isReg() && MO.readsReg())
+ // Conservatively clear any kill flags on uses of sunk instruction
+ for (MachineOperand &MO : NewMI->all_uses()) {
+ assert(MO.isReg() && MO.isUse());
RegsToClearKillFlags.insert(MO.getReg());
- }
+ }
- // The instruction is moved from its basic block, so do not retain the
- // debug information.
- assert(!I.isDebugInstr() && "Should not sink debug inst");
- I.setDebugLoc(DebugLoc());
+ // The instruction is moved from its basic block, so do not retain the
+ // debug information.
+ assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
+ NewMI->setDebugLoc(DebugLoc());
+
+ // Replace the use with the newly created virtual register.
+ RegSubRegPair &UseReg = Entry.first;
+ MI->substituteRegister(UseReg.Reg, NewMI->getOperand(0).getReg(),
+ UseReg.SubReg, *TRI);
+ }
+ // If we have replaced all uses, then delete the dead instruction
+ if (I.isDead(*MRI))
+ I.eraseFromParent();
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir
new file mode 100644
index 00000000000000..bca1517ed183ac
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir
@@ -0,0 +1,1272 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --sink-insts-to-avoid-spills=1 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX9-SUNK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 --sink-insts-to-avoid-spills=1 -mattr=+wavefrontsize64 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX10-SUNK %s
+
+---
+name: test_sink_copy
+alignment: 1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX9-SUNK-LABEL: name: test_sink_copy
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.4:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.5:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.6.entry:
+ ;
+ ; GFX10-SUNK-LABEL: name: test_sink_copy
+ ; GFX10-SUNK: bb.0:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.1:
+ ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.2:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.3:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.4:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.5:
+ ; GFX10-SUNK-NEXT: S_ENDPGM 0
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.6.entry:
+ bb.0:
+ %0:vreg_256 = IMPLICIT_DEF
+ %1:vreg_256 = IMPLICIT_DEF
+ %2:vreg_256 = IMPLICIT_DEF
+ %3:vreg_256 = IMPLICIT_DEF
+ %4:vreg_256 = IMPLICIT_DEF
+ %5:vreg_256 = COPY %4
+ %6:vreg_256 = COPY %4
+ %7:vreg_256 = COPY %4
+ %8:vreg_256 = COPY %4
+ %9:vreg_256 = COPY %4
+
+
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_BRANCH %bb.4
+
+ bb.3:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+...
+
+# For gfx9, after sinking the copies, pressure is within the desired limit
+
+---
+name: test_sink_multi_stage
+alignment: 1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX9-SUNK-LABEL: name: test_sink_multi_stage
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.4:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.5:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.6.entry:
+ ;
+ ; GFX10-SUNK-LABEL: name: test_sink_multi_stage
+ ; GFX10-SUNK: bb.0:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.1:
+ ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.2:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.3:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.4:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.5:
+ ; GFX10-SUNK-NEXT: S_ENDPGM 0
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.6.entry:
+ bb.0:
+ %0:vreg_256 = IMPLICIT_DEF
+ %1:vreg_256 = IMPLICIT_DEF
+ %2:vreg_256 = IMPLICIT_DEF
+ %3:vreg_256 = IMPLICIT_DEF
+ %4:vreg_256 = IMPLICIT_DEF
+ %5:vreg_256 = COPY %4
+ %6:vreg_256 = COPY %4
+ %7:vreg_256 = COPY %4
+ %8:vreg_256 = COPY %4
+ %9:vreg_256 = COPY %4
+ %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ INLINEASM &"", 1, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_BRANCH %bb.4
+
+ bb.3:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ INLINEASM &"", 1, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+...
+
+---
+name: test_sink_low_rp
+alignment: 1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX9-SUNK-LABEL: name: test_sink_low_rp
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.4:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.5:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.6.entry:
+ ;
+ ; GFX10-SUNK-LABEL: name: test_sink_low_rp
+ ; GFX10-SUNK: bb.0:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.1:
+ ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.2:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.3:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.4:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.5:
+ ; GFX10-SUNK-NEXT: S_ENDPGM 0
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.6.entry:
+ bb.0:
+ %0:vreg_256 = IMPLICIT_DEF
+ %1:vreg_256 = IMPLICIT_DEF
+ %2:vreg_256 = IMPLICIT_DEF
+ %3:vreg_256 = IMPLICIT_DEF
+ %4:vreg_256 = IMPLICIT_DEF
+ %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25
+ S_BRANCH %bb.4
+
+ bb.3:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+...
+
+---
+name: test_sink_high_rp
+alignment: 1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX9-SUNK-LABEL: name: test_sink_high_rp
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.4:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.5:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.6.entry:
+ ;
+ ; GFX10-SUNK-LABEL: name: test_sink_high_rp
+ ; GFX10-SUNK: bb.0:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.1:
+ ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.2:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.3:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.4:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.5:
+ ; GFX10-SUNK-NEXT: S_ENDPGM 0
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.6.entry:
+ bb.0:
+ %0:vreg_256 = IMPLICIT_DEF
+ %1:vreg_256 = IMPLICIT_DEF
+ %2:vreg_256 = IMPLICIT_DEF
+ %3:vreg_256 = IMPLICIT_DEF
+ %4:vreg_256 = IMPLICIT_DEF
+ %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+
+ S_BRANCH %bb.1
+
+ bb.1:
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+ S_BRANCH %bb.4
+
+ bb.3:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+...
+
+# Do not sink convergent op (MFMA)
+
+---
+name: test_sink_convergent
+alignment: 1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX9-SUNK-LABEL: name: test_sink_convergent
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF5]], [[DEF7]], [[DEF6]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF8]], implicit [[V_MFMA_F32_4X4X1F32_e64_]]
+ ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.4:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.5:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.6.entry:
+ ;
+ ; GFX10-SUNK-LABEL: name: test_sink_convergent
+ ; GFX10-SUNK: bb.0:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF5]], [[DEF7]], [[DEF6]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.1:
+ ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF8]], implicit [[V_MFMA_F32_4X4X1F32_e64_]]
+ ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.2:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.3:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.4:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.5:
+ ; GFX10-SUNK-NEXT: S_ENDPGM 0
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.6.entry:
+ bb.0:
+ %0:vreg_256 = IMPLICIT_DEF
+ %1:vreg_256 = IMPLICIT_DEF
+ %2:vreg_256 = IMPLICIT_DEF
+ %3:vreg_256 = IMPLICIT_DEF
+ %4:vreg_256 = IMPLICIT_DEF
+ %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:areg_128_align2 = IMPLICIT_DEF
+ %42:vgpr_32 = IMPLICIT_DEF
+ %43:vgpr_32 = IMPLICIT_DEF
+ %44:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %40, %42, %41, 0, 0, 0, implicit $mode, implicit $exec
+
+
+ S_BRANCH %bb.1
+
+ bb.1:
+ INLINEASM &"", 1, implicit %43, implicit %44
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+ S_BRANCH %bb.4
+
+ bb.3:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+...
+
+# Do not sink instructions with multiple defs
+
+---
+name: test_sink_multi_def
+alignment: 1
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX9-SUNK-LABEL: name: test_sink_multi_def
+ ; GFX9-SUNK: bb.0:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.1:
+ ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]]
+ ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.2:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.3:
+ ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.4:
+ ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX9-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.5:
+ ; GFX9-SUNK-NEXT: S_ENDPGM 0
+ ; GFX9-SUNK-NEXT: {{ $}}
+ ; GFX9-SUNK-NEXT: bb.6.entry:
+ ;
+ ; GFX10-SUNK-LABEL: name: test_sink_multi_def
+ ; GFX10-SUNK: bb.0:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.1
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.1:
+ ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]]
+ ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.3
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.2:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.3:
+ ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]]
+ ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]]
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.4
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.4:
+ ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000)
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ ; GFX10-SUNK-NEXT: S_BRANCH %bb.5
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.5:
+ ; GFX10-SUNK-NEXT: S_ENDPGM 0
+ ; GFX10-SUNK-NEXT: {{ $}}
+ ; GFX10-SUNK-NEXT: bb.6.entry:
+ bb.0:
+ %0:vreg_256 = IMPLICIT_DEF
+ %1:vreg_256 = IMPLICIT_DEF
+ %2:vreg_256 = IMPLICIT_DEF
+ %3:vreg_256 = IMPLICIT_DEF
+ %4:vreg_256 = IMPLICIT_DEF
+ %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec
+ %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec
+ %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec
+ %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec
+ %32:vgpr_32, %33:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub2:vreg_256, %1.sub4:vreg_256, 0, implicit $exec
+
+ S_BRANCH %bb.1
+
+ bb.1:
+ INLINEASM &"", 1, implicit %32, implicit %33
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+ S_BRANCH %bb.4
+
+ bb.3:
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4
+ INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31
+ S_BRANCH %bb.4
+
+ bb.4:
+ S_CBRANCH_SCC1 %bb.1, implicit undef $scc
+ S_BRANCH %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
index efa21052e3ae2f..0fc31ea9d64379 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir
@@ -1,5 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX9 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink --sink-insts-to-avoid-spills=1 -o - %s | FileCheck -check-prefixes=GFX9 %s
+
---
name: test_sink_fmac_to_only_use
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
index 04c80582f6f079..ef6771278b06f3 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir
@@ -1,78 +1,79 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX10 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink --sink-insts-to-avoid-spills=1 -o - %s | FileCheck -check-prefixes=GFX10 %s
---
name: multi_else_break
tracksRegLiveness: true
body: |
- ; CHECK-LABEL: name: multi_else_break
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $vgpr4, $vgpr5
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5
- ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5
- ; CHECK-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5
- ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
- ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc
- ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc
- ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.4
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
- ; CHECK-NEXT: SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: S_ENDPGM 0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec
- ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec
- ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
- ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc
- ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.2(0x7c000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4
- ; CHECK-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4
- ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc
- ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.6
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.6:
- ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5
- ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc
- ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: S_BRANCH %bb.3
+ ; GFX10-LABEL: name: multi_else_break
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.2(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6
+ ; GFX10-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5
+ ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5
+ ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5
+ ; GFX10-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5
+ ; GFX10-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
+ ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: successors: %bb.5(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec
+ ; GFX10-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec
+ ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]]
+ ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc
+ ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.5:
+ ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.2(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4
+ ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4
+ ; GFX10-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4
+ ; GFX10-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX10-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc
+ ; GFX10-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.6:
+ ; GFX10-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5
+ ; GFX10-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX10-NEXT: [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc
+ ; GFX10-NEXT: SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.3
bb.0:
successors: %bb.1(0x80000000)
liveins: $vgpr4, $vgpr5
diff --git a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
index 43c286a830b42e..f23afe52f97de8 100644
--- a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
+++ b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir
@@ -25,23 +25,24 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[LARL:%[0-9]+]]:addr64bit = LARL @b
+ ; CHECK-NEXT: [[LA:%[0-9]+]]:gr64bit = LA killed [[LARL]], 49, $noreg
+ ; CHECK-NEXT: [[LGHI:%[0-9]+]]:gr64bit = LGHI 7
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[LA:%[0-9]+]]:gr64bit = LA [[LARL]], 49, $noreg
- ; CHECK-NEXT: [[LGHI:%[0-9]+]]:gr64bit = LGHI 7
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64bit = COPY [[LA]]
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0
; CHECK-NEXT: $r2d = COPY [[DEF]]
- ; CHECK-NEXT: $r3d = COPY [[LA]]
+ ; CHECK-NEXT: $r3d = COPY [[COPY]]
; CHECK-NEXT: $r4d = COPY [[LGHI]]
; CHECK-NEXT: CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
; CHECK-NEXT: ADJCALLSTACKUP 0, 0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0
; CHECK-NEXT: $r2d = COPY [[DEF1]]
- ; CHECK-NEXT: $r3d = COPY [[LA]]
+ ; CHECK-NEXT: $r3d = COPY [[COPY]]
; CHECK-NEXT: $r4d = COPY [[LGHI]]
; CHECK-NEXT: CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
; CHECK-NEXT: ADJCALLSTACKUP 0, 0
@@ -54,19 +55,20 @@ body: |
%2:gr64bit = LGHI 7
%3:gr64bit = IMPLICIT_DEF
%5:gr64bit = IMPLICIT_DEF
+ %6:gr64bit = COPY killed %0
bb.1:
successors: %bb.1(0x80000000)
ADJCALLSTACKDOWN 0, 0
$r2d = COPY %3
- $r3d = COPY %0
+ $r3d = COPY %6
$r4d = COPY %2
CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
ADJCALLSTACKUP 0, 0
ADJCALLSTACKDOWN 0, 0
$r2d = COPY %5
- $r3d = COPY %0
+ $r3d = COPY %6
$r4d = COPY %2
CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d
ADJCALLSTACKUP 0, 0
More information about the llvm-commits
mailing list