[llvm] [AMDGPU] Reimplement PreRARematStage stage for the GCNSchedStrategy. Early-preview, WIP. (PR #72505)
Valery Pykhtin via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 16 03:52:10 PST 2023
https://github.com/vpykhtin created https://github.com/llvm/llvm-project/pull/72505
None
>From 3e6e768a05d51f283799db78d6a9c3c8374d1e62 Mon Sep 17 00:00:00 2001
From: Valery Pykhtin <valery.pykhtin at gmail.com>
Date: Wed, 15 Nov 2023 14:28:32 +0100
Subject: [PATCH] [AMDGPU] Reimplement PreRARematStage stage for the
GCNSchedStrategy.
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 36 +
llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 +
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 71 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 36 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 255 ++++-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 20 +
llvm/lib/Target/AMDGPU/GCNSinkTRInstr.cpp | 656 ++++++++++++
llvm/lib/Target/AMDGPU/GCNSinkTRInstr.h | 120 +++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 +
...ne-scheduler-sink-trivial-remats-debug.mir | 1 +
.../machine-scheduler-sink-trivial-remats.mir | 952 +++++++++---------
llvm/test/test.mir | 66 ++
12 files changed, 1706 insertions(+), 510 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/GCNSinkTRInstr.cpp
create mode 100644 llvm/lib/Target/AMDGPU/GCNSinkTRInstr.h
create mode 100644 llvm/test/test.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 4326f3c3fbe1ae7..1dd21088115881b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -648,6 +648,42 @@ bool GCNSubtarget::useVGPRIndexMode() const {
bool GCNSubtarget::useAA() const { return UseAA; }
+// This is a temp function to match getOccupancyWithNumSGPRs boundaires
+unsigned GCNSubtarget::getMaxNumSGPRs(unsigned Occupancy) const {
+ assert(Occupancy >= 1 && Occupancy <= 10);
+ const unsigned MaxNumSGPRs = getTotalNumSGPRs();
+
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ return MaxNumSGPRs;
+
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (Occupancy) {
+ case 10:
+ return 80;
+ case 9:
+ return 88;
+ case 8:
+ return 100;
+ default:
+ return MaxNumSGPRs;
+ }
+ }
+ switch (Occupancy) {
+ case 10:
+ return 48;
+ case 9:
+ return 56;
+ case 8:
+ return 64;
+ case 7:
+ return 72;
+ case 6:
+ return 80;
+ default:
+ return MaxNumSGPRs;
+ }
+}
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return getMaxWavesPerEU();
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 0c0720890794b66..fa90160d2c625a0 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -112,6 +112,7 @@ add_llvm_target(AMDGPUCodeGen
GCNRegPressure.cpp
GCNRewritePartialRegUses.cpp
GCNSchedStrategy.cpp
+ GCNSinkTRInstr.cpp
GCNVOPDUtils.cpp
R600AsmPrinter.cpp
R600ClauseMergePass.cpp
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 6a0bc163646e1e4..20a5183c1c8823f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -47,6 +47,39 @@ unsigned GCNRegPressure::getRegKind(Register Reg,
: (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
+GCNRegPressure::ExcessMask
+GCNRegPressure::exceed(const GCNRegPressure &RHS) const {
+ ExcessMask ResMask = 0;
+ if (Value[SGPR32] > RHS.Value[SGPR32])
+ ResMask |= 1ul << SGPR32;
+ if (Value[VGPR32] > RHS.Value[VGPR32])
+ ResMask |= 1ul << VGPR32;
+ if (Value[AGPR32] > RHS.Value[AGPR32])
+ ResMask |= 1ul << AGPR32;
+ return ResMask;
+}
+
+GCNRegPressure::ExcessMask
+GCNRegPressure::getRegExcessMask(Register Reg, const MachineRegisterInfo &MRI) {
+ assert(Reg.isVirtual());
+ const auto RC = MRI.getRegClass(Reg);
+ auto STI = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+ return STI->isSGPRClass(RC)
+ ? (1ul << SGPR32)
+ : STI->isVGPRClass(RC)
+ ? (1ul << VGPR32)
+ : STI->isAGPRClass(RC) ? (1ul << AGPR32) : 0;
+}
+
+GCNRegPressure GCNRegPressure::getMaxPressure(unsigned Occupancy,
+ const GCNSubtarget &ST) {
+ GCNRegPressure RP;
+ RP.Value[SGPR32] = ST.getMaxNumSGPRs(Occupancy);
+ RP.Value[VGPR32] = ST.getMaxNumVGPRs(Occupancy);
+ RP.Value[AGPR32] = RP.Value[VGPR32];
+ return RP;
+}
+
void GCNRegPressure::inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
@@ -239,6 +272,25 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
return LiveRegs;
}
+LaneBitmask GCNRPTracker::decIfAlive(Register Reg) {
+ auto I = LiveRegs.find(Reg);
+ if (I == LiveRegs.end())
+ return LaneBitmask::getNone();
+ LaneBitmask PrevMask = I->second;
+ CurPressure.dec(Reg, PrevMask, *MRI);
+ LiveRegs.erase(I);
+ return PrevMask;
+}
+
+LaneBitmask GCNRPTracker::dec(Register Reg) {
+ auto I = LiveRegs.find(Reg);
+ assert(I != LiveRegs.end());
+ LaneBitmask PrevMask = I->second;
+ CurPressure.dec(Reg, PrevMask, *MRI);
+ LiveRegs.erase(I);
+ return PrevMask;
+}
+
void GCNRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *LiveRegsCopy,
bool After) {
@@ -339,7 +391,7 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
bool GCNDownwardRPTracker::advanceBeforeNext() {
assert(MRI && "call reset first");
if (!LastTrackedMI)
- return NextMI == MBBEnd;
+ return NextMI != MBBEnd;
assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
@@ -387,7 +439,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
LastTrackedMI = nullptr;
- return NextMI == MBBEnd;
+ return NextMI != MBBEnd;
}
void GCNDownwardRPTracker::advanceToNext() {
@@ -491,6 +543,21 @@ Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
});
}
+// Print register kind in front of register number, i.e. S64%2, V%1. For 32-bit
+// registers size is omitted.
+Printable llvm::printGCNRegShort(Register Reg, const MachineRegisterInfo &MRI) {
+ return Printable([&, Reg](raw_ostream &OS) {
+ assert(Reg.isVirtual());
+ const auto RC = MRI.getRegClass(Reg);
+ auto STI = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
+ OS << (STI->isSGPRClass(RC) ? 'S' : STI->isAGPRClass(RC) ? 'A' : 'V');
+ unsigned BitSize = STI->getRegSizeInBits(*RC);
+ if (BitSize != 32)
+ OS << BitSize;
+ OS << printReg(Reg, nullptr, 0, &MRI);
+ });
+}
+
void GCNRegPressure::dump() const { dbgs() << print(*this); }
static cl::opt<bool> UseDownwardTracker(
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e21bf10d795ba52..cb856f5a22ed065 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -70,6 +70,14 @@ struct GCNRegPressure {
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
+ void dec(unsigned Reg, LaneBitmask Mask, const MachineRegisterInfo &MRI) {
+ inc(Reg, Mask, LaneBitmask::getNone(), MRI);
+ }
+
+ void dec(unsigned Reg, const MachineRegisterInfo &MRI) {
+ dec(Reg, MRI.getMaxLaneMaskForVReg(Reg), MRI);
+ }
+
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure& O) const {
return getOccupancy(ST) > O.getOccupancy(ST);
}
@@ -85,13 +93,25 @@ struct GCNRegPressure {
return !(*this == O);
}
+ typedef unsigned ExcessMask;
+ // Return mask denoting kinds of registers where this RP is higher that RHS.
+ ExcessMask exceed(const GCNRegPressure &RHS) const;
+
+ static unsigned getRegKind(Register Reg, const MachineRegisterInfo &MRI);
+
+ // Return mask to test if the Reg is of the kind where excess happend.
+ static ExcessMask getRegExcessMask(Register Reg,
+ const MachineRegisterInfo &MRI);
+
+ // Return pressure with the maximum number of registers for a given Occupancy.
+ static GCNRegPressure getMaxPressure(unsigned Occupancy,
+ const GCNSubtarget &ST);
+
void dump() const;
private:
unsigned Value[TOTAL_KINDS];
- static unsigned getRegKind(Register Reg, const MachineRegisterInfo &MRI);
-
friend GCNRegPressure max(const GCNRegPressure &P1,
const GCNRegPressure &P2);
@@ -128,6 +148,16 @@ class GCNRPTracker {
void clearMaxPressure() { MaxPressure.clear(); }
+ LaneBitmask getLiveMask(Register Reg) const { return LiveRegs.lookup(Reg); }
+
+ // If the Reg is live decrement register for current pressure and
+ // erase it from live regs. Return register lanemask if the reg was live
+ // and empty lanemask otherwise.
+ LaneBitmask decIfAlive(Register Reg);
+
+ // Same as previous but the Reg has to be alive.
+ LaneBitmask dec(Register Reg);
+
GCNRegPressure getPressure() const { return CurPressure; }
decltype(LiveRegs) moveLiveRegs() {
@@ -305,6 +335,8 @@ Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST = nullptr);
Printable print(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineRegisterInfo &MRI);
+Printable printGCNRegShort(Register Reg, const MachineRegisterInfo &MRI);
+
Printable reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
const GCNRPTracker::LiveRegSet &TrackedL,
const TargetRegisterInfo *TRI, StringRef Pfx = " ");
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index c3d60b635d3240a..c2cdf7c319acdd0 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -25,11 +25,14 @@
#include "GCNSchedStrategy.h"
#include "AMDGPUIGroupLP.h"
+#include "GCNSinkTRInstr.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#define DEBUG_TYPE "machine-scheduler"
+#define EXPENSIVE_CHECKS
+
using namespace llvm;
static cl::opt<bool> DisableUnclusterHighRP(
@@ -741,17 +744,178 @@ bool ClusteredLowOccStage::initGCNSchedStage() {
return true;
}
+DenseMap<MachineBasicBlock *, GCNRegPressure>
+PreRARematStage::getMBBPressure() const {
+ DenseMap<MachineBasicBlock *, GCNRegPressure> MBBPressure;
+ MachineBasicBlock *LastSeenMBB = nullptr;
+ GCNRegPressure *CurMBBRP;
+ for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
+ auto *MBB = DAG.Regions[I].first->getParent();
+ if (MBB != LastSeenMBB) {
+ CurMBBRP = &MBBPressure[MBB];
+ LastSeenMBB = MBB;
+ }
+ *CurMBBRP = max(*CurMBBRP, DAG.Pressure[I]);
+ }
+ return MBBPressure;
+}
+
+static auto LessIndexFirst =
+ [](const std::pair<SlotIndex, unsigned> &A,
+ const std::pair<SlotIndex, unsigned> &B) -> bool {
+ return A.first < B.first;
+};
+
+// This should be called before the instructions are erased from the old place.
+void PreRARematStage::updateSourceRegionBoundaries(
+ const DenseSet<Register> &SinkRegs, const SinkTrivallyRematInstr &STR) {
+ auto &SII = *DAG.LIS->getSlotIndexes();
+ SmallVector<SlotIndex> DefSI;
+ for (Register Reg : SinkRegs)
+ DefSI.push_back(SII.getInstructionIndex(*STR.getDefInstr(Reg)));
+ sort(DefSI);
+ for (auto DefI = DefSI.begin(), E = DefSI.end(); DefI != E;) {
+ auto R = std::upper_bound(RgnEnd.begin(), RgnEnd.end(),
+ std::make_pair(*DefI, 0u), LessIndexFirst);
+ if (R == RgnEnd.end() ||
+ &*DAG.Regions[R->second].first != SII.getInstructionFromIndex(*DefI)) {
+ ++DefI;
+ continue;
+ }
+ auto &Rgn = DAG.Regions[R->second];
+ do {
+ ++Rgn.first;
+ ++DefI;
+ if (Rgn.first == Rgn.second || DefI == E)
+ break;
+ } while (&*Rgn.first == SII.getInstructionFromIndex(*DefI));
+ }
+}
+
+// This should be called after the instructions are inserted in the new place.
+void PreRARematStage::updateTargetRegionBoundaries(
+ const DenseSet<Register> &SinkRegs, const SinkTrivallyRematInstr &STR) {
+ auto &SII = *DAG.LIS->getSlotIndexes();
+ DenseSet<MachineInstr *> Visited;
+ for (Register Reg : SinkRegs) {
+ auto *UserMI = STR.getUserInstr(Reg);
+ if (!Visited.insert(UserMI).second)
+ continue; // One instruction can use more than one sinked reg.
+
+ auto UserSI = SII.getInstructionIndex(*UserMI);
+ auto R = std::upper_bound(RgnEnd.begin(), RgnEnd.end(),
+ std::make_pair(UserSI, 0u), LessIndexFirst);
+ if (R == RgnEnd.end() || &*DAG.Regions[R->second].first != UserMI)
+ continue;
+
+ unsigned NumInstrInserted = count_if(UserMI->uses(), [&](auto &Op) {
+ return Op.isReg() && SinkRegs.contains(Op.getReg());
+ });
+#ifndef NDEBUG
+ auto DefinesSinkableReg = [&](MachineInstr &DefMI, MachineInstr &UserMI) {
+ auto &Op0 = DefMI.getOperand(0);
+ return Op0.isReg() && Op0.isDef() && SinkRegs.contains(Op0.getReg()) &&
+ UserMI.readsRegister(Op0.getReg());
+ };
+#endif
+ auto &F = DAG.Regions[R->second].first;
+ auto B = UserMI->getParent()->begin();
+ while (F != B && NumInstrInserted > 0) {
+ --F, --NumInstrInserted;
+ assert(DefinesSinkableReg(*F, *UserMI));
+ }
+ assert(NumInstrInserted == 0);
+ }
+}
+
+unsigned
+GCNScheduleDAGMILive::findFirstRegionInMBB(MachineBasicBlock *MBB) const {
+ unsigned Idx = Regions.size() - 1;
+ for (; (int)Idx >= 0; --Idx)
+ if (MBB == getRegionMBB(Idx))
+ break;
+ return Idx;
+}
+
+unsigned PreRARematStage::findContainingRegion(MachineInstr &MI) const {
+ auto &SII = *DAG.LIS->getSlotIndexes();
+ auto SI = SII.getInstructionIndex(MI);
+ auto R = std::upper_bound(RgnEnd.begin(), RgnEnd.end(),
+ std::make_pair(SI, 0u), LessIndexFirst);
+ if (R == RgnEnd.end())
+ return -1;
+ unsigned Idx = R->second;
+ auto &Rgn = DAG.Regions[Idx];
+ auto F = skipDebugInstructionsForward(Rgn.first, Rgn.second);
+ return SI >= SII.getInstructionIndex(*F) ? Idx : -1;
+}
+
+#ifndef NDEBUG
+LLVM_DUMP_METHOD
+bool GCNScheduleDAGMILive::isValid() const {
+ unsigned CalcMinOcc = std::numeric_limits<unsigned>::max();
+ bool Result = true;
+ for (unsigned RI = 0, RE = Regions.size(); RI != RE; ++RI) {
+ auto &Rgn = Regions[RI];
+#ifdef EXPENSIVE_CHECKS
+ GCNDownwardRPTracker RPT(*LIS);
+ RPT.reset(*Rgn.first);
+ auto &LISLR = RPT.getLiveRegs();
+ auto &DAGLR = LiveIns[RI];
+ if (!isEqual(LISLR, DAGLR)) {
+ dbgs() << "Liveins mismatch at bb." << Rgn.first->getParent()->getNumber()
+ << ", region:\n " << *Rgn.first << " " << *std::prev(Rgn.second)
+ << reportMismatch(LISLR, DAGLR, TRI);
+ Result = false;
+ }
+ RPT.advance(Rgn.second);
+ auto LISRP = RPT.moveMaxPressure();
+ auto &DAGRP = Pressure[RI];
+ if (DAGRP != LISRP) {
+ dbgs() << "RP mismatch at bb." << Rgn.first->getParent()->getNumber()
+ << ", region:\n " << *Rgn.first << " " << *std::prev(Rgn.second)
+ << "DAG RP: " << print(DAGRP, &ST)
+ << "LIS RP: " << print(LISRP, &ST);
+ Result = false;
+ }
+#endif
+ unsigned RgnOcc = Pressure[RI].getOccupancy(ST);
+ if (RgnOcc < MinOccupancy) {
+ dbgs() << "Occupancy " << RgnOcc << " is less than minimal "
+ << MinOccupancy << ", region:\n " << *Rgn.first << " "
+ << *std::prev(Rgn.second);
+ Result = false;
+ }
+ bool IsMinOcc = RgnOcc == MinOccupancy;
+ if (RegionsWithMinOcc[RI] != IsMinOcc) {
+ dbgs() << "Incorrect RegionsWithMinOcc = " << IsMinOcc << ", region:\n "
+ << *Rgn.first << " " << *std::prev(Rgn.second);
+ Result = false;
+ }
+ CalcMinOcc = std::min(CalcMinOcc, RgnOcc);
+ }
+ if (CalcMinOcc != MinOccupancy) {
+ dbgs() << "Incorrect MinOccupancy = " << MinOccupancy << ", should be "
+ << CalcMinOcc << '\n';
+ Result = false;
+ }
+ return Result;
+}
+#endif
+
bool PreRARematStage::initGCNSchedStage() {
if (!GCNSchedStage::initGCNSchedStage())
return false;
- if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
+ if (DAG.MinOccupancy == DAG.MFI.getMaxWavesPerEU())
+ return false; // Nothing to improve.
+
+ if (DAG.Regions.size() == 1)
return false;
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
// Check maximum occupancy
- if (ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize()) ==
- DAG.MinOccupancy)
+ unsigned MaxOcc = ST.computeOccupancy(MF.getFunction(), MFI.getLDSSize());
+ if (MaxOcc == DAG.MinOccupancy)
return false;
// FIXME: This pass will invalidate cached MBBLiveIns for regions
@@ -760,10 +924,89 @@ bool PreRARematStage::initGCNSchedStage() {
// need to be fixed if there is another pass after this pass.
assert(!S.hasNextStage());
- collectRematerializableInstructions();
- if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
+ auto MBBPressure = getMBBPressure();
+ SinkTrivallyRematInstr STR(DAG.MF, DAG.LIS, MBBPressure);
+
+ DenseSet<Register> Regs;
+ unsigned NewOcc = STR.collectSinkableRegs(Regs, DAG.MinOccupancy, MaxOcc);
+ if (NewOcc <= DAG.MinOccupancy)
return false;
+ auto &SII = *DAG.LIS->getSlotIndexes();
+ for (unsigned RI = 0, RE = DAG.Regions.size(); RI != RE; ++RI) {
+ auto &Rgn = DAG.Regions[RI];
+ auto L = skipDebugInstructionsBackward(std::prev(Rgn.second), Rgn.first);
+ RgnEnd.emplace_back(SII.getInstructionIndex(*L).getDeadSlot(), RI);
+ }
+ sort(RgnEnd, LessIndexFirst);
+
+ // Update live-ins, update pressure for regions where regs are live-through.
+ auto UpdateRgnLiveInsAndPressure = [&](Register Reg, unsigned RgnIdx) {
+ auto &LiveIns = DAG.LiveIns[RgnIdx];
+ auto I = LiveIns.find(Reg);
+ if (I != LiveIns.end()) {
+ // Pressure in regions requiring pressure calculation will be overwritten
+ // later so don't bother checking because dec is cheap.
+ DAG.Pressure[RgnIdx].dec(Reg, I->second, DAG.MRI);
+ LiveIns.erase(I);
+ }
+ };
+ DenseSet<unsigned> RecalcPressureRgnIdx;
+ for (Register Reg : Regs) {
+ MachineInstr *DefMI = STR.getDefInstr(Reg);
+ unsigned SrcRgnIdx = findContainingRegion(*DefMI);
+ if ((int)SrcRgnIdx != -1)
+ RecalcPressureRgnIdx.insert(SrcRgnIdx);
+
+ // Update regions after source region if any.
+ // Note: regions are stored from bottom to top.
+ MachineBasicBlock *SrcMBB = DefMI->getParent();
+ for (unsigned Idx = (int)SrcRgnIdx == -1 ? SrcRgnIdx - 1
+ : DAG.findFirstRegionInMBB(SrcMBB);
+ (int)Idx >= 0 && SrcMBB == DAG.getRegionMBB(Idx); --Idx) {
+ UpdateRgnLiveInsAndPressure(Reg, Idx);
+ }
+
+ MachineInstr *UserMI = STR.getUserInstr(Reg);
+ unsigned TgtRgnIdx = findContainingRegion(*UserMI);
+ if ((int)TgtRgnIdx != -1)
+ RecalcPressureRgnIdx.insert(TgtRgnIdx);
+ MachineBasicBlock *TgtMBB =
+ (int)TgtRgnIdx != -1 ? UserMI->getParent() : nullptr;
+ STR.forEveryMBBRegIsLiveIn(Reg, [&](MachineBasicBlock *MBB) {
+ int LastIdx = MBB != TgtMBB ? 0 : TgtRgnIdx;
+ for (unsigned Idx = DAG.findFirstRegionInMBB(MBB);
+ (int)Idx >= LastIdx && MBB == DAG.getRegionMBB(Idx); --Idx) {
+ UpdateRgnLiveInsAndPressure(Reg, Idx);
+ }
+ });
+ }
+
+ updateSourceRegionBoundaries(Regs, STR);
+
+ STR.sinkTriviallyRematInstrs(Regs);
+
+ updateTargetRegionBoundaries(Regs, STR);
+
+ // Update pressure. TODO: pressure could be cached in STR.collectSinkableRegs.
+ for (unsigned RI : RecalcPressureRgnIdx) {
+ auto &Rgn = DAG.Regions[RI];
+ GCNDownwardRPTracker RPT(*DAG.LIS);
+ RPT.advance(Rgn.first, Rgn.second, &DAG.LiveIns[RI]);
+ DAG.Pressure[RI] = RPT.moveMaxPressure();
+ }
+
+ // Update RegionsWithMinOcc flags.
+ for (unsigned RI = 0, RE = DAG.Regions.size(); RI != RE; ++RI) {
+ DAG.RegionsWithMinOcc[RI] = DAG.Pressure[RI].getOccupancy(ST) == NewOcc;
+ }
+
+ DAG.MinOccupancy = NewOcc;
+ SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ MFI.increaseOccupancy(MF, NewOcc);
+
+ assert(DAG.isValid());
+
LLVM_DEBUG(
dbgs() << "Retrying function scheduling with improved occupancy of "
<< DAG.MinOccupancy << " from rematerializing\n");
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 7862ec1e894b62e..b29e3817ff03567 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -23,6 +23,7 @@ class SIMachineFunctionInfo;
class SIRegisterInfo;
class GCNSubtarget;
class GCNSchedStage;
+class SinkTrivallyRematInstr;
enum class GCNSchedStageID : unsigned {
OccInitialSchedule = 0,
@@ -232,6 +233,12 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
+ MachineBasicBlock *getRegionMBB(unsigned Idx) const {
+ return Regions[Idx].first->getParent();
+ }
+
+ unsigned findFirstRegionInMBB(MachineBasicBlock *MBB) const;
+
public:
GCNScheduleDAGMILive(MachineSchedContext *C,
std::unique_ptr<MachineSchedStrategy> S);
@@ -239,6 +246,8 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
void schedule() override;
void finalizeSchedule() override;
+
+ bool isValid() const;
};
// GCNSchedStrategy applies multiple scheduling stages to a function.
@@ -388,6 +397,17 @@ class PreRARematStage : public GCNSchedStage {
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
const TargetInstrInfo *TII);
+ SmallVector<std::pair<SlotIndex, unsigned>> RgnEnd;
+
+ DenseMap<MachineBasicBlock *, GCNRegPressure> getMBBPressure() const;
+
+ unsigned findContainingRegion(MachineInstr &MI) const;
+
+ void updateSourceRegionBoundaries(const DenseSet<Register> &SinkRegs,
+ const SinkTrivallyRematInstr &STR);
+ void updateTargetRegionBoundaries(const DenseSet<Register> &SinkRegs,
+ const SinkTrivallyRematInstr &STR);
+
public:
bool initGCNSchedStage() override;
diff --git a/llvm/lib/Target/AMDGPU/GCNSinkTRInstr.cpp b/llvm/lib/Target/AMDGPU/GCNSinkTRInstr.cpp
new file mode 100644
index 000000000000000..cc0a2c45b14704d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNSinkTRInstr.cpp
@@ -0,0 +1,656 @@
+//===-- GCNSinkTRInstr.cpp ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+//===----------------------------------------------------------------------===//
+
+#include "GCNSinkTRInstr.h"
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "sink-trivials"
+
+using namespace llvm;
+
+SinkTrivallyRematInstr::SinkTrivallyRematInstr(
+ MachineFunction &MF_, LiveIntervals *LIS_,
+ const DenseMap<MachineBasicBlock *, GCNRegPressure> &MBBPressure_)
+ : MF(MF_), ST(MF.getSubtarget<GCNSubtarget>()), MRI(MF.getRegInfo()),
+ TII(*MF.getSubtarget().getInstrInfo()),
+ TRI(*MF.getSubtarget().getRegisterInfo()), LIS(*LIS_),
+ MBBPressure(MBBPressure_) {}
+
+void SinkTrivallyRematInstr::init() const {
+ if (!MBBFirstInstSlot.empty())
+ return;
+ auto &SII = *LIS.getSlotIndexes();
+ for (auto &MBB : MF) {
+ auto FirstMII = skipDebugInstructionsForward(MBB.begin(), MBB.end());
+ auto SI = SII.getInstructionIndex(*FirstMII);
+ MBBFirstInstSlot.push_back(SI);
+ }
+ sort(MBBFirstInstSlot); // For binary searches.
+}
+
+void SinkTrivallyRematInstr::forEveryMBBRegIsLiveIn(
+ Register Reg, std::function<void(MachineBasicBlock *MBB)> Callback) const {
+ init();
+ auto &LI = LIS.getInterval(Reg);
+ SmallVector<SlotIndex, 16> LiveSI;
+ if (!LI.findIndexesLiveAt(MBBFirstInstSlot, std::back_inserter(LiveSI)))
+ return;
+ auto &SII = *LIS.getSlotIndexes();
+ for (auto SI : LiveSI) {
+ auto *MBB = SII.getInstructionFromIndex(SI)->getParent();
+ Callback(MBB);
+ }
+}
+
+void SinkTrivallyRematInstr::cacheLiveOuts(
+ const DenseMap<MachineBasicBlock *, SmallVector<Register>> &MBBs) const {
+ SmallVector<MachineInstr *, 16> LastMBBMI;
+ for (auto &P : MBBs) {
+ MachineBasicBlock *MBB = P.first;
+ if (LiveOuts.count(MBB))
+ continue;
+ auto LastI = skipDebugInstructionsForward(MBB->rbegin(), MBB->rend());
+ LastMBBMI.push_back(&*LastI);
+ }
+ if (!LastMBBMI.empty()) {
+ for (auto &P : getLiveRegMap(LastMBBMI, true /*After*/, LIS))
+ LiveOuts[P.first->getParent()] = P.second;
+ }
+}
+
+const GCNRegPressure &
+SinkTrivallyRematInstr::getPressure(MachineBasicBlock *MBB) const {
+ auto I = MBBPressure.find(MBB);
+ if (I == MBBPressure.end()) {
+ GCNDownwardRPTracker RPTracker(LIS);
+ RPTracker.advance(MBB->begin(), MBB->end());
+ I = MBBPressure.try_emplace(MBB, RPTracker.moveMaxPressure()).first;
+ }
+ return I->second;
+}
+
+MachineInstr *SinkTrivallyRematInstr::getDefInstr(Register Reg) const {
+ assert(MRI.hasOneDef(Reg) && MRI.hasOneNonDBGUse(Reg));
+ return MRI.getOneDef(Reg)->getParent();
+}
+
+MachineInstr *SinkTrivallyRematInstr::getUserInstr(Register Reg) const {
+ assert(MRI.hasOneDef(Reg) && MRI.hasOneNonDBGUse(Reg));
+ return &*MRI.use_instr_nodbg_begin(Reg);
+}
+
+bool SinkTrivallyRematInstr::fromHighToLowRP(
+ DenseMap<MachineBasicBlock *, SmallVector<Register>> &MBBs,
+ unsigned Occupancy,
+ std::function<bool(MachineBasicBlock *MBB)> Callback) const {
+
+ SmallVector<MachineBasicBlock *, 8> ProcessOrder;
+ ProcessOrder.reserve(MBBs.size());
+ for (auto &P : MBBs)
+ ProcessOrder.push_back(P.first);
+
+ // Move higher RP first.
+ sort(ProcessOrder, [&](MachineBasicBlock *A, MachineBasicBlock *B) {
+ return getPressure(B).less(ST, getPressure(A), Occupancy) ||
+ A->getNumber() < B->getNumber(); // Stabilize output.
+ });
+
+ // Select regs for theirs sink target regions.
+ for (auto *MBB : ProcessOrder) {
+ if (!Callback(MBB))
+ return false;
+ }
+ return true;
+}
+
+// Helper class to maintain a pool of available registers for sinking on
+// RP tracking.
+class RegisterPool {
+ SmallVector<Register> Regs;
+ BitVector AvailRegs;
+ DenseMap<MachineInstr *, SmallVector<unsigned>> MIToAvailRegsMap;
+ GCNRegPressure::ExcessMask NoAvailEMask = 0;
+
+public:
+ // Select maps registers to instructions: a reg becomes availiable when update
+ // method meets an instruction. It returns a pair where:
+ // first:bool - add/skip a reg to the pool.
+ // second:MachineInst* - instruction that makes a reg available on update,
+ // nullptr if the reg is available from the start, like live-through.
+ RegisterPool(
+ const SmallVectorImpl<Register> &Regs_,
+ std::function<std::pair<bool, MachineInstr *>(Register)> Select) {
+ Regs.reserve(Regs_.size());
+ AvailRegs.resize(Regs_.size());
+ for (Register Reg : Regs_) {
+ auto R = Select(Reg);
+ if (R.first) {
+ unsigned Idx = Regs.size();
+ Regs.push_back(Reg);
+ if (R.second)
+ MIToAvailRegsMap[R.second].push_back(Idx);
+ else
+ AvailRegs.set(Idx);
+ }
+ }
+ }
+
+ // Pop availiable reg satisfying ExcessMask. Registers are selected in the
+ // order they had upon creation of this object. Return empty reg on fail.
+ Register pop(GCNRegPressure::ExcessMask ExcessMask,
+ const MachineRegisterInfo &MRI) {
+ // This check prevents redundant bit scans for a sequence of pops when no
+ // new available registers added.
+ if (0 == (ExcessMask & ~NoAvailEMask))
+ return Register();
+
+ for (unsigned IdxReg : AvailRegs.set_bits()) {
+ if (ExcessMask & GCNRegPressure::getRegExcessMask(Regs[IdxReg], MRI)) {
+ AvailRegs.reset(IdxReg);
+ return Regs[IdxReg];
+ }
+ }
+ // If nothing found ExcessMask indicate register kinds that are not avail.
+ NoAvailEMask |= ExcessMask;
+ return Register();
+ }
+
+ // Update availiable regs when MI was met during RP tracking.
+ void update(const MachineInstr &MI) {
+ auto I = MIToAvailRegsMap.find(&MI);
+ if (I != MIToAvailRegsMap.end()) {
+ for (auto RI : I->second)
+ AvailRegs.set(RI);
+ NoAvailEMask = 0;
+ }
+ }
+};
+
+bool SinkTrivallyRematInstr::selectRegsFromSinkSourceMBB(
+ MachineBasicBlock *MBB, DenseSet<Register> &SelectedRegs,
+ const SmallVectorImpl<Register> &Regs,
+ const GCNRegPressure &TargetRP) const {
+ LLVM_DEBUG(dbgs() << "Sink source "
+ << printMBBHeader(MBB, Regs, SelectedRegs, TargetRP));
+ // TODO: precalculate region live-ins if the reset is slow.
+ // Note that a block can be tested several times for different occupancies.
+ GCNDownwardRPTracker RPT(LIS);
+ RPT.reset(*MBB->begin()); // RPT is in the state before *I in program order.
+
+ // Since a destination register of a sinkable instruction should have one
+ // definition it cannot be alive on the entrance of the MBB therefore a reg
+ // should either originate from this MBB or be live-through but not both. [1]
+ assert(false == any_of(Regs, [&, MBB](Register Reg) {
+ return !((MBB == getSinkSourceMBB(Reg)) ^
+ RPT.getLiveMask(Reg).any());
+ }));
+
+ // Remove SelectedRegs from live-ins: this can be a live-through reg
+ // originating from other source MBB.
+ for (Register Reg : SelectedRegs)
+ RPT.decIfAlive(Reg);
+ LLVM_DEBUG(dbgs() << "Presel RP: " << print(RPT.getPressure(), &ST) << '\n');
+
+ RegisterPool AvailRegs(Regs, [&](Register Reg) {
+ return SelectedRegs.contains(Reg)
+ ? std::make_pair(false, (MachineInstr *)nullptr)
+ : std::make_pair(true, getDefInstr(Reg)); // See [1].
+ });
+ bool FailedToReachTarget = false;
+ while (RPT.advanceBeforeNext()) {
+ RPT.advanceToNext(); // RPT is in the state at the MI.
+ AvailRegs.update(*RPT.getLastTrackedMI());
+
+ if (auto ExcessMask = RPT.getPressure().exceed(TargetRP)) {
+ LLVM_DEBUG(dbgs() << "\tAt " << *RPT.getLastTrackedMI() << "\tRP: "
+ << print(RPT.getPressure(), &ST) << "\tSel: ");
+ do {
+ Register Reg = AvailRegs.pop(ExcessMask, MRI);
+ if (!Reg)
+ break;
+ LLVM_DEBUG(dbgs() << printReg(Reg) << ", ");
+ SelectedRegs.insert(Reg);
+ RPT.dec(Reg);
+ } while ((ExcessMask = RPT.getPressure().exceed(TargetRP)));
+
+ LLVM_DEBUG(dbgs() << (ExcessMask ? "failed to reach target RP" : "")
+ << "\n\n");
+
+ // Don't stop if we failed to reach TargetRP in a hope that this MBB will
+ // be processed as sink target later.
+ FailedToReachTarget |= ExcessMask != 0;
+ }
+ }
+ return !FailedToReachTarget;
+}
+
+// Select sinkable Regs for a given MBB so that its RP wouldn't exceed TargetRP.
+// Regs include live-through ones. Registers are selected in the order of Regs
+// so the caller could prioritize them. SelectedRegs is used as in-out argument,
+// it may contain registers already selected for sinking earlier.
+// Return true if the resulting pressure wouldn't exceed TargetRP.
+bool SinkTrivallyRematInstr::selectRegsFromSinkTargetMBB(
+ MachineBasicBlock *MBB, DenseSet<Register> &SelectedRegs,
+ const SmallVectorImpl<Register> &Regs,
+ const GCNRegPressure &TargetRP) const {
+ LLVM_DEBUG(dbgs() << "Sink target "
+ << printMBBHeader(MBB, Regs, SelectedRegs, TargetRP));
+
+ auto IsASelectedSinkSourceInstr = [&](MachineInstr &MI) {
+ if (MI.getNumOperands() == 0)
+ return false;
+ MachineOperand &Op0 = MI.getOperand(0);
+ return Op0.isReg() && Op0.isDef() && SelectedRegs.contains(Op0.getReg());
+ };
+
+ // Track pressure upwards so we can start selecting registers with longer live
+ // ranges to minimize the number of registers needed to fit TargetRP.
+ auto E = MBB->rend();
+ auto I = skipDebugInstructionsForward(MBB->rbegin(), E);
+ if (I == E)
+ return false;
+
+ GCNUpwardRPTracker RPT(LIS);
+ RPT.reset(MRI, LiveOuts[MBB]); // RPT's state is after last instr in the MBB.
+ RPT.clearMaxPressure();
+
+ // Regs can target this MBB, be live-through or both because a reg can be used
+ // in a loop. [2]
+ assert(false == any_of(Regs, [&, MBB](Register Reg) {
+ return MBB != getSinkTargetMBB(Reg) && RPT.getLiveMask(Reg).none();
+ }));
+
+ // Remove preselected registers from the live-outs if any.
+ DenseMap<MachineInstr *, SmallVector<Register>> PreSelRegsAt;
+ GCNRegPressure MaxRP = getPressure(MBB);
+ for (Register Reg : SelectedRegs) {
+ LaneBitmask Mask = RPT.decIfAlive(Reg);
+ if (Mask.any()) {
+ // Check if this is a selected reg from source instruction in this MBB.
+ if (getDefInstr(Reg)->getParent() != MBB)
+ // This is a live-through register, decrease max pressure for this MBB.
+ MaxRP.dec(Reg, Mask, MRI);
+ } else {
+ MachineInstr *UserMI = getUserInstr(Reg);
+ // Save preselected registers used by a target instruction in this MBB to
+ // update pressure when we meet the instruction, see [3].
+ if (UserMI->getParent() == MBB)
+ PreSelRegsAt[UserMI].push_back(Reg);
+ }
+ }
+ if (!MaxRP.exceed(TargetRP))
+ return true;
+
+ RegisterPool AvailRegs(Regs, [&](Register Reg) {
+ if (SelectedRegs.contains(Reg))
+ return std::make_pair(false, (MachineInstr *)nullptr);
+ // A reg can be live-out if the MBB is inside a loop, see [2].
+ return std::make_pair(true, RPT.getLiveMask(Reg).none() ? getUserInstr(Reg)
+ : nullptr);
+ });
+ do {
+ MachineInstr &MI = *I;
+ I = skipDebugInstructionsForward(++I, E);
+
+ // If a sink source instruction was selected from this MBB on the previous
+ // stage - skip it.
+ if (IsASelectedSinkSourceInstr(MI))
+ continue;
+
+ RPT.recede(MI); // RPT is in the state before MI in program order.
+ auto RPAtMI = RPT.getMaxPressureAndReset(); // Pressure at the MI.
+ if (auto ExcessMask = RPAtMI.exceed(TargetRP)) {
+ LLVM_DEBUG(dbgs() << "\tAt " << MI << "\tRP: " << print(RPAtMI, &ST)
+ << "\tSel: ");
+ do {
+ Register Reg = AvailRegs.pop(ExcessMask, MRI);
+ if (!Reg)
+ break;
+ LLVM_DEBUG(dbgs() << printReg(Reg) << ", ");
+ SelectedRegs.insert(Reg);
+ LaneBitmask Mask = RPT.dec(Reg);
+ RPAtMI.dec(Reg, Mask, MRI);
+ } while ((ExcessMask = RPAtMI.exceed(TargetRP)));
+
+ LLVM_DEBUG(dbgs() << (ExcessMask ? "failed to reach target RP" : "")
+ << "\n\n");
+ if (ExcessMask)
+ return false; // That was the last hope, exiting.
+ }
+
+ // Update registers available for sinking after we pass the MI.
+ AvailRegs.update(MI);
+
+ // Decrease the pressure by preselected registers as if the instructions
+ // defining them were already placed in front of MI. [3]
+ auto PSI = PreSelRegsAt.find(&MI);
+ if (PSI != PreSelRegsAt.end())
+ for_each(PSI->second, [&](Register Reg) { RPT.dec(Reg); });
+
+ } while (I != E);
+ return true;
+}
+
+// Select registers from Regs so that MBBs where they're live-through doesn't
+// excess RP for the Occupancy. SelectedRegs is used as in-out argument, it may
+// contain registers already selected for sinking earlier.
+// IsMBBProcessed predicate excludes blocks from processing.
+void SinkTrivallyRematInstr::selectLiveThroughRegs(
+ DenseSet<Register> &SelectedRegs, const SmallVectorImpl<Register> &Regs,
+ const GCNRegPressure &TargetRP,
+ std::function<bool(MachineBasicBlock *MBB)> IsMBBProcessed) const {
+
+ LLVM_DEBUG(dbgs() << "Selecting live-through regs:\nTarget RP: "
+ << print(TargetRP, &ST));
+ // Decrease per MBB pressure by preselected live-through registers.
+ DenseMap<MachineBasicBlock *, GCNRegPressure> RPMap;
+ for (Register Reg : SelectedRegs) {
+ auto *MySinkTargetMBB = getSinkTargetMBB(Reg);
+ forEveryMBBRegIsLiveIn(Reg, [&](MachineBasicBlock *MBB) {
+ if (IsMBBProcessed(MBB) || MBB == MySinkTargetMBB)
+ return;
+ auto &RP = RPMap.try_emplace(MBB, getPressure(MBB)).first->second;
+ RP.dec(Reg, MRI);
+ });
+ }
+
+ SmallVector<GCNRegPressure *, 8> RPToUpdate;
+ for (Register Reg : Regs) {
+ assert(!SelectedRegs.contains(Reg));
+ RPToUpdate.clear();
+ auto *MySinkTargetMBB = getSinkTargetMBB(Reg);
+ bool Selected = false;
+ forEveryMBBRegIsLiveIn(Reg, [&, Reg](MachineBasicBlock *MBB) {
+ if (IsMBBProcessed(MBB) || MBB == MySinkTargetMBB)
+ return;
+ auto &RP = RPMap.try_emplace(MBB, getPressure(MBB)).first->second;
+ if (!Selected) {
+ auto EMask = RP.exceed(TargetRP);
+ if (EMask && (EMask & GCNRegPressure::getRegExcessMask(Reg, MRI))) {
+ Selected = true;
+ LLVM_DEBUG(dbgs() << printReg(Reg) << "@bb." << MBB->getNumber()
+ << " when RP: " << print(RP, &ST));
+ } else {
+ RPToUpdate.push_back(&RP);
+ return;
+ }
+ }
+ RP.dec(Reg, MRI);
+ });
+ if (Selected) {
+ SelectedRegs.insert(Reg);
+ for (auto *RP : RPToUpdate)
+ RP->dec(Reg, MRI);
+ }
+ }
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
+// Return true if the defining instruction for the Reg can be sinked.
+bool SinkTrivallyRematInstr::isSinkableReg(Register Reg) const {
+ if (!MRI.hasOneDef(Reg) || !MRI.hasOneNonDBGUse(Reg))
+ return false;
+
+ MachineOperand *Op = MRI.getOneDef(Reg);
+ if (Op->getSubReg())
+ return false;
+
+ MachineInstr *DefI = Op->getParent();
+ MachineInstr *UseI = &*MRI.use_instr_nodbg_begin(Reg);
+ if (DefI->getParent() == UseI->getParent())
+ return false; // Not within a single MBB.
+
+ if (!TII.isTriviallyReMaterializable(*DefI))
+ return false;
+
+ for (const MachineOperand &MO : DefI->operands())
+ if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ return false;
+
+ return true;
+}
+
+void SinkTrivallyRematInstr::findExcessiveSinkSourceMBBs(
+ const DenseMap<Register, unsigned> &Regs, const GCNRegPressure &TargetRP,
+ DenseMap<MachineBasicBlock *, SmallVector<Register>> &SinkSrc) const {
+ // Collect sink source MBBs (and theirs sinkables) that have RP excess
+ // for the current occupancy.
+ SinkSrc.clear();
+ for (auto &P : Regs) {
+ Register Reg = P.first;
+ auto *MBB = getSinkSourceMBB(Reg);
+ if (getPressure(MBB).exceed(TargetRP))
+ SinkSrc[MBB].push_back(Reg);
+ }
+
+ // Add live-through registers.
+ for (auto &P : Regs) {
+ Register Reg = P.first;
+ forEveryMBBRegIsLiveIn(Reg, [&](MachineBasicBlock *MBB) {
+ auto I = SinkSrc.find(MBB);
+ if (I != SinkSrc.end())
+ I->second.push_back(Reg);
+ });
+ }
+}
+
+void SinkTrivallyRematInstr::findExcessiveSinkTargetMBBs(
+ const DenseMap<Register, unsigned> &Regs, // Reg -> NumRgnsLiveIn.
+ const GCNRegPressure &TargetRP,
+ DenseMap<MachineBasicBlock *, SmallVector<Register>> &SinkTgt) const {
+ // Collect sink target MBBs (and theirs sinkables) that have RP excess
+ // for the current occupancy.
+ SinkTgt.clear();
+ for (auto &P : Regs) {
+ Register Reg = P.first;
+ auto *MBB = getSinkTargetMBB(Reg);
+ if (getPressure(MBB).exceed(TargetRP))
+ SinkTgt[MBB].push_back(Reg);
+ }
+ if (SinkTgt.empty())
+ return;
+
+ // Add live-through registers.
+ for (auto &P : Regs) {
+ Register Reg = P.first;
+ auto *MySinkTargetMBB = getSinkTargetMBB(Reg);
+ forEveryMBBRegIsLiveIn(Reg, [&](MachineBasicBlock *MBB) {
+ if (MBB == MySinkTargetMBB)
+ return;
+ auto I = SinkTgt.find(MBB);
+ if (I != SinkTgt.end())
+ I->second.push_back(Reg);
+ });
+ }
+}
+
+// Does its best to find better NewOccupancy by selecting registers defined by
+// trivially rematerializable instructions with a single use to be sinked prior
+// their uses. NewOccupancy on input contains the maximum occupancy constraint
+// defined by the caller. Return true if NewOccupancy is higher than
+// MinOccupancy.
+unsigned
+SinkTrivallyRematInstr::collectSinkableRegs(DenseSet<Register> &SelectedRegs,
+ unsigned MinOccupancy,
+ unsigned MaxOccupancy) const {
+ LLVM_DEBUG(dbgs() << "Collecting sinkable regs for " << MF.getName() << '\n');
+ DenseMap<Register, unsigned> SinkableRegs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (isSinkableReg(Reg))
+ SinkableRegs[Reg] = 0;
+ }
+
+ // Find if the game worth the candle - theoretically possible occupancy after
+ // sinking: assume that maximum RP for a region is decreased by every reg
+ // defined by a sinkable instruction. This is true for regions where such
+ // registers are live-through but for source and sink target regions an
+ // additional RP check should be made later.
+ unsigned NewOccupancy = MaxOccupancy;
+ DenseMap<MachineBasicBlock *, GCNRegPressure> BestRP;
+ for (auto &P : SinkableRegs) {
+ Register Reg = P.first;
+ // Calculating RP for source region.
+ auto *SrcMBB = getSinkSourceMBB(Reg);
+ auto I = BestRP.try_emplace(SrcMBB, getPressure(SrcMBB)).first;
+ I->second.dec(Reg, MRI);
+
+ // Calculating RP for target and live-through regions.
+ forEveryMBBRegIsLiveIn(Reg, [&](MachineBasicBlock *MBB) {
+ auto I = BestRP.try_emplace(MBB, getPressure(MBB)).first;
+ I->second.dec(Reg, MRI);
+ ++P.second; // Num regions the reg is live-in.
+ });
+ }
+ for (auto &MBB : MF) {
+ auto I = BestRP.find(&MBB);
+ const auto &RP = I != BestRP.end() ? I->second : getPressure(&MBB);
+ NewOccupancy = std::min(NewOccupancy, RP.getOccupancy(ST));
+ }
+ if (NewOccupancy <= MinOccupancy) {
+ LLVM_DEBUG(dbgs() << "The occupancy cannot be improved by sinking\n");
+ return NewOccupancy;
+ }
+
+ // Predicate to prioritize registers by the number of MBBs there're live in.
+ auto MostSeenFirst = [&](Register A, Register B) {
+ return SinkableRegs.lookup(A) > SinkableRegs.lookup(B);
+ };
+
+ // Try to select registers for sink target regions by lowering occupancy until
+ // success. TODO: binary search?
+ DenseMap<MachineBasicBlock *, SmallVector<Register>> SinkSrc, SinkTgt;
+ GCNRegPressure TargetRP;
+ bool Success = true;
+ do {
+ LLVM_DEBUG(dbgs() << "Trying to reach occupancy " << NewOccupancy << '\n');
+ SelectedRegs.clear();
+
+ TargetRP = GCNRegPressure::getMaxPressure(NewOccupancy, ST);
+
+ findExcessiveSinkSourceMBBs(SinkableRegs, TargetRP, SinkSrc);
+ fromHighToLowRP(SinkSrc, NewOccupancy, [&](MachineBasicBlock *MBB) {
+ auto &Regs = SinkSrc[MBB];
+ sort(Regs, MostSeenFirst);
+ // Leave MBBs we failed in for the next stage.
+ if (selectRegsFromSinkSourceMBB(MBB, SelectedRegs, Regs, TargetRP))
+ SinkSrc.erase(MBB);
+ return true;
+ });
+
+ findExcessiveSinkTargetMBBs(SinkableRegs, TargetRP, SinkTgt);
+ for (auto P : SinkSrc) {
+ // If failed MMB is not going to be processed this is the fail.
+ if (!SinkTgt.count(P.first)) {
+ Success = false;
+ break;
+ }
+ }
+ if (Success) {
+ cacheLiveOuts(SinkTgt);
+ Success =
+ fromHighToLowRP(SinkTgt, NewOccupancy, [&](MachineBasicBlock *MBB) {
+ auto &Regs = SinkTgt[MBB];
+ sort(Regs, MostSeenFirst);
+ return selectRegsFromSinkTargetMBB(MBB, SelectedRegs, Regs,
+ TargetRP);
+ });
+ }
+ } while (!Success && --NewOccupancy > MinOccupancy);
+
+ if (!Success) {
+ LLVM_DEBUG(dbgs() << "No occupancy improvement\n\n");
+ return MinOccupancy;
+ }
+
+ // At this point we've selected registers that can be sinked to its target
+ // regions and proved these regions would have RP for the NewOccupancy. Now
+ // process regions with RP excess where yet unselected sinkable registers are
+ // live-through. It's already proven these regions would also have RP suitable
+ // for the NewOccupancy by the theoretical occupancy test.
+ SmallVector<Register, 8> Regs;
+ for (auto &P : SinkableRegs) {
+ if (!SelectedRegs.contains(P.first))
+ Regs.push_back(P.first);
+ }
+ sort(Regs, MostSeenFirst);
+ selectLiveThroughRegs(
+ SelectedRegs, Regs, TargetRP, [&](MachineBasicBlock *MBB) {
+ return SinkSrc.count(MBB) > 0 || SinkTgt.count(MBB) > 0;
+ });
+
+ LLVM_DEBUG(dbgs() << "Reached occupancy " << NewOccupancy << ", sel regs: ";
+ for (Register Reg
+ : SelectedRegs) dbgs()
+ << printReg(Reg) << ", ";
+ dbgs() << "\n\n");
+ return NewOccupancy;
+}
+
+// Does the actual sinking of trivially rematerializable instructions defining
+// Regs in front of their uses.
+void SinkTrivallyRematInstr::sinkTriviallyRematInstrs(
+ const DenseSet<Register> &Regs) const {
+ for (Register Reg : Regs) {
+ MachineInstr *UserMI = &*MRI.use_instr_nodbg_begin(Reg);
+ auto InsPos = MachineBasicBlock::iterator(UserMI);
+ MachineInstr *DefInst = MRI.getOneDef(Reg)->getParent();
+ // Rematerialize MI to its use block. Since we are only rematerializing
+ // instructions that do not have any virtual reg uses, we do not need to
+ // call LiveRangeEdit::allUsesAvailableAt() and
+ // LiveRangeEdit::canRematerializeAt().
+ TII.reMaterialize(*InsPos->getParent(), InsPos, Reg, 0, *DefInst, TRI);
+ MachineInstr *NewMI = &*std::prev(InsPos);
+
+ // Update live intervals.
+ LIS.removeInterval(Reg);
+ LIS.RemoveMachineInstrFromMaps(*DefInst);
+ DefInst->eraseFromParent();
+ LIS.InsertMachineInstrInMaps(*NewMI);
+ LIS.createAndComputeVirtRegInterval(Reg);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+Printable SinkTrivallyRematInstr::printReg(Register Reg) const {
+ return llvm::printGCNRegShort(Reg, MRI);
+}
+
+LLVM_DUMP_METHOD
+Printable
+SinkTrivallyRematInstr::printMBBHeader(MachineBasicBlock *MBB,
+ const SmallVectorImpl<Register> &Regs,
+ const DenseSet<Register> &SelectedRegs,
+ const GCNRegPressure &TargetRP) const {
+ return Printable([&, MBB](raw_ostream &OS) {
+ OS << "bb." << MBB->getNumber() << ", preselected regs: ";
+ for (Register Reg : SelectedRegs) {
+ OS << printReg(Reg) << ", ";
+ }
+ OS << "\nregs to select: ";
+ for (Register Reg : Regs) {
+ if (SelectedRegs.contains(Reg))
+ continue;
+ OS << printReg(Reg) << ", ";
+ }
+ OS << "\nTarget RP: " << print(TargetRP, &ST)
+ << "Actual RP: " << print(getPressure(MBB), &ST);
+ });
+}
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNSinkTRInstr.h b/llvm/lib/Target/AMDGPU/GCNSinkTRInstr.h
new file mode 100644
index 000000000000000..3c91beba553746d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNSinkTRInstr.h
@@ -0,0 +1,120 @@
+//===-- GCNSinkTRIntstr.h - GCN Scheduler Strategy -*- C++ -*--------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSINKTRINSTR_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNSINKTRINSTR_H
+
+#include "GCNRegPressure.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include <functional>
+#include <map>
+
+namespace llvm {
+
+struct GCNRegPressure;
+class GCNSubtarget;
+class LiveIntervals;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+class SinkTrivallyRematInstr {
+public:
+ SinkTrivallyRematInstr(
+ MachineFunction &MF, LiveIntervals *LIS,
+ const DenseMap<MachineBasicBlock *, GCNRegPressure> &MBBPressure);
+
+ unsigned collectSinkableRegs(DenseSet<Register> &SelectedRegs,
+ unsigned MinOccupancy,
+ unsigned MaxOccupancy) const;
+
+ void sinkTriviallyRematInstrs(const DenseSet<Register> &Regs) const;
+
+ void forEveryMBBRegIsLiveIn(
+ Register Reg, std::function<void(MachineBasicBlock *MBB)> Callback) const;
+
+ MachineInstr *getDefInstr(Register Reg) const;
+ MachineInstr *getUserInstr(Register Reg) const;
+
+private:
+ MachineFunction &MF;
+ const GCNSubtarget &ST;
+ const MachineRegisterInfo &MRI;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ LiveIntervals &LIS;
+
+ mutable DenseMap<MachineBasicBlock *, GCNRegPressure> MBBPressure;
+ mutable SmallVector<SlotIndex, 16> MBBFirstInstSlot;
+ mutable DenseMap<MachineBasicBlock *, GCNRPTracker::LiveRegSet> LiveOuts;
+
+ void init() const;
+
+ MachineBasicBlock *getSinkSourceMBB(Register Reg) const {
+ return getDefInstr(Reg)->getParent();
+ }
+
+ MachineBasicBlock *getSinkTargetMBB(Register Reg) const {
+ return getUserInstr(Reg)->getParent();
+ }
+
+ const GCNRegPressure &getPressure(MachineBasicBlock *MBB) const;
+
+ void cacheLiveOuts(
+ const DenseMap<MachineBasicBlock *, SmallVector<Register>> &MBBs) const;
+
+ bool
+ fromHighToLowRP(DenseMap<MachineBasicBlock *, SmallVector<Register>> &MBBs,
+ unsigned Occupancy,
+ std::function<bool(MachineBasicBlock *MBB)> Callback) const;
+
+ bool isSinkableReg(Register Reg) const;
+
+ bool selectRegsFromSinkSourceMBB(MachineBasicBlock *MBB,
+ DenseSet<Register> &SelectedRegs,
+ const SmallVectorImpl<Register> &Regs,
+ const GCNRegPressure &TargetRP) const;
+
+ bool selectRegsFromSinkTargetMBB(MachineBasicBlock *MBB,
+ DenseSet<Register> &SelectedRegs,
+ const SmallVectorImpl<Register> &Regs,
+ const GCNRegPressure &TargetRP) const;
+
+ void findExcessiveSinkSourceMBBs(
+ const DenseMap<Register, unsigned> &Regs, const GCNRegPressure &TargetRP,
+ DenseMap<MachineBasicBlock *, SmallVector<Register>> &SinkSrc) const;
+
+ void findExcessiveSinkTargetMBBs(
+ const DenseMap<Register, unsigned> &Regs, const GCNRegPressure &TargetRP,
+ DenseMap<MachineBasicBlock *, SmallVector<Register>> &SinkTgt) const;
+
+ void selectLiveThroughRegs(
+ DenseSet<Register> &SelectedRegs, const SmallVectorImpl<Register> &Regs,
+ const GCNRegPressure &TargetRP,
+ std::function<bool(MachineBasicBlock *MBB)> IsMBBProcessed) const;
+
+ Printable printReg(Register Reg) const;
+ Printable printMBBHeader(MachineBasicBlock *MBB,
+ const SmallVectorImpl<Register> &Regs,
+ const DenseSet<Register> &SelectedRegs,
+ const GCNRegPressure &TargetRP) const;
+};
+
+} // End namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNSINKTRINSTR_H
\ No newline at end of file
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4c624ed59427c95..1b7b0ed18b7a54b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1239,6 +1239,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
}
+ unsigned getMaxNumSGPRs(unsigned Occupancy) const;
+
/// \returns Reserved number of SGPRs. This is common
/// utility function called by MachineFunction and
/// Function variants of getReservedNumSGPRs.
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
index 0b87676e351547f..c96f92c71f5e356 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-debug.mir
@@ -1,5 +1,6 @@
# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck -check-prefix=DEBUG %s
# REQUIRES: asserts
+# XFAIL: *
--- |
define void @sink_and_inc_idx_when_skipping_small_region_1() "amdgpu-flat-work-group-size"="1,64" {
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 4337782cb32e952..307f28599cc8493 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -84,13 +84,11 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
S_NOP 0
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -191,14 +189,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -300,7 +296,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -308,7 +303,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -408,7 +402,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -416,7 +409,6 @@ body: |
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -529,7 +521,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -537,14 +528,12 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -552,7 +541,6 @@ body: |
S_NOP 0, implicit %25
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -666,7 +654,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -674,7 +661,6 @@ body: |
S_NOP 0, implicit %23, implicit %22
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
%25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
@@ -682,7 +668,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode, implicit-def $m0
@@ -690,7 +675,6 @@ body: |
S_NOP 0, implicit %25, implicit %26
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -744,8 +728,8 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -766,8 +750,8 @@ body: |
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -898,13 +882,13 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %23.sub0:vreg_64 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[V_CVT_I32_F64_e32_23:%[0-9]+]].sub0:vreg_64 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: %23.sub1:vreg_64 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit %23
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]].sub1:vreg_64 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -949,14 +933,12 @@ body: |
undef %23.sub0:vreg_64 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23.sub1:vreg_64 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1003,15 +985,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: %21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: %21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: %21.sub3:vreg_128 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit %21
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub3:vreg_128 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -1053,7 +1035,6 @@ body: |
undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
@@ -1062,7 +1043,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -1091,10 +1071,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1122,87 +1102,87 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -1224,8 +1204,8 @@ body: |
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_27]]
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -1248,47 +1228,47 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]], implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_24]], implicit [[V_CVT_I32_F64_e32_25]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_26]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -1581,7 +1561,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -1589,7 +1568,6 @@ body: |
S_NOP 0, implicit %24, implicit %25
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -1620,10 +1598,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -1647,87 +1625,87 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -1749,8 +1727,8 @@ body: |
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -1771,47 +1749,47 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_9]], implicit [[V_CVT_I32_F64_e32_19]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_20]], implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_22]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]]
; GFX908-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -2026,10 +2004,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -2053,138 +2031,138 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
- ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
- ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
- ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
- ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
- ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
- ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
- ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
- ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
- ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
- ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
- ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
- ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
- ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
- ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
- ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
- ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
- ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
- ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
- ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
- ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
- ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
- ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
- ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
- ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
- ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
- ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
- ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
- ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
- ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
- ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
- ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
- ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
- ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
- ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
- ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
- ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
- ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
- ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
- ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
- ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
- ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
- ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
- ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
- ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
- ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
- ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
- ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
- ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
- ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
- ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
- ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
- ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
- ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
- ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
- ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
- ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
- ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
- ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
- ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
- ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
- ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
- ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
- ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
- ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
- ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
- ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
- ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
- ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
- ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
- ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
- ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
- ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
- ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
- ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
- ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
- ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
- ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
- ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
- ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
- ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
- ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
- ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
- ; GFX908-NEXT: [[S_MOV_B32_82:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
- ; GFX908-NEXT: [[S_MOV_B32_83:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
- ; GFX908-NEXT: [[S_MOV_B32_84:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_80:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: [[S_MOV_B32_81:%[0-9]+]]:sgpr_32 = S_MOV_B32 80
+ ; GFX908-NEXT: [[S_MOV_B32_82:%[0-9]+]]:sgpr_32 = S_MOV_B32 81
+ ; GFX908-NEXT: [[S_MOV_B32_83:%[0-9]+]]:sgpr_32 = S_MOV_B32 82
+ ; GFX908-NEXT: [[S_MOV_B32_84:%[0-9]+]]:sgpr_32 = S_MOV_B32 83
+ ; GFX908-NEXT: [[S_MOV_B32_85:%[0-9]+]]:sgpr_32 = S_MOV_B32 84
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_8]], implicit [[S_MOV_B32_9]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_18]], implicit [[S_MOV_B32_19]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_28]], implicit [[S_MOV_B32_29]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_38]], implicit [[S_MOV_B32_39]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_48]], implicit [[S_MOV_B32_49]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_58]], implicit [[S_MOV_B32_59]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_60]], implicit [[S_MOV_B32_61]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_62]], implicit [[S_MOV_B32_63]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_64]], implicit [[S_MOV_B32_65]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_66]], implicit [[S_MOV_B32_67]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_68]], implicit [[S_MOV_B32_69]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_70]], implicit [[S_MOV_B32_71]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_72]], implicit [[S_MOV_B32_73]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_74]], implicit [[S_MOV_B32_75]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_76]], implicit [[S_MOV_B32_77]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_78]], implicit [[S_MOV_B32_79]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_80]], implicit [[S_MOV_B32_81]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_82]], implicit [[S_MOV_B32_83]]
- ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_84]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_13]], implicit [[S_MOV_B32_14]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_23]], implicit [[S_MOV_B32_24]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_33]], implicit [[S_MOV_B32_34]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_43]], implicit [[S_MOV_B32_44]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_53]], implicit [[S_MOV_B32_54]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_59]], implicit [[S_MOV_B32_60]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_61]], implicit [[S_MOV_B32_62]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_63]], implicit [[S_MOV_B32_64]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_65]], implicit [[S_MOV_B32_66]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_67]], implicit [[S_MOV_B32_68]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_69]], implicit [[S_MOV_B32_70]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_71]], implicit [[S_MOV_B32_72]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_73]], implicit [[S_MOV_B32_74]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_75]], implicit [[S_MOV_B32_76]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_77]], implicit [[S_MOV_B32_78]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_80]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_81]], implicit [[S_MOV_B32_82]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_83]], implicit [[S_MOV_B32_84]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_85]]
; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_GT_U32_e64_]], implicit-def dead $scc
; GFX908-NEXT: $exec = S_MOV_B64_term [[S_AND_B64_]]
@@ -2202,8 +2180,8 @@ body: |
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -2528,14 +2506,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
successors: %bb.3
S_NOP 0, implicit %23
@@ -2543,7 +2519,6 @@ body: |
S_NOP 0
bb.3:
- ; predecessors: %bb.2
successors: %bb.4
%26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
@@ -2551,7 +2526,6 @@ body: |
S_NOP 0, implicit %26, implicit %27
bb.4:
- ; predcessors: %bb.3
S_NOP 0, implicit %25
S_NOP 0, implicit %0, implicit %1
@@ -2599,15 +2573,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; GFX908-NEXT: %21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: undef [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: %21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: %21.sub3:vreg_128 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit %21
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub3:vreg_128 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -2650,7 +2624,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -2658,7 +2631,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -2759,7 +2731,6 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
@@ -2767,7 +2738,6 @@ body: |
S_NOP 0, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
@@ -2823,8 +2793,8 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -2846,14 +2816,14 @@ body: |
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
; GFX908-NEXT: successors: %bb.1(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
; GFX908-NEXT: S_BRANCH %bb.1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.5:
@@ -2989,8 +2959,8 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3012,8 +2982,8 @@ body: |
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -3166,8 +3136,9 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3181,17 +3152,16 @@ body: |
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_32]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_31]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -3356,8 +3326,9 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_32:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 32, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_33:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 33, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_34:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 34, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3371,17 +3342,16 @@ body: |
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_35:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_36:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 35, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_36:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 36, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_36]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_35]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -3864,10 +3834,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -3930,6 +3900,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_60:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 60, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_61:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 61, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_62:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 62, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_63:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -3943,17 +3914,16 @@ body: |
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_63:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_63]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_64:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 63, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_64:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 64, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_64]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_63]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -4146,10 +4116,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -4232,6 +4202,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_80:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 80, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_81:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 81, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_82:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 82, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_83:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -4245,17 +4216,16 @@ body: |
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_83:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_83]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_84:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 83, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_84:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 84, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_84]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_83]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -4488,10 +4458,10 @@ body: |
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
- ; GFX908-NEXT: undef %4.sub1:sreg_64 = S_MOV_B32 0
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
; GFX908-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -4618,6 +4588,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_124:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 124, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_125:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 125, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_126:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 126, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_127:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
@@ -4631,17 +4602,16 @@ body: |
; GFX908-NEXT: bb.2:
; GFX908-NEXT: successors: %bb.3(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_127:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_127]]
- ; GFX908-NEXT: [[V_CVT_I32_F64_e32_128:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 127, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_128:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 128, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_128]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_127]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.3:
; GFX908-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: $exec = S_OR_B64 $exec, [[COPY2]], implicit-def $scc
- ; GFX908-NEXT: undef %4.sub0:sreg_64 = S_ADD_I32 %4.sub0, -1, implicit-def dead $scc
- ; GFX908-NEXT: S_CMP_LG_U32 %4.sub0, 0, implicit-def $scc
+ ; GFX908-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = S_ADD_I32 [[S_MOV_B32_]].sub0, -1, implicit-def dead $scc
+ ; GFX908-NEXT: S_CMP_LG_U32 [[S_MOV_B32_]].sub0, 0, implicit-def $scc
; GFX908-NEXT: S_CBRANCH_SCC0 %bb.5, implicit killed $scc
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.4:
@@ -4979,15 +4949,15 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %21.sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
- ; GFX908-NEXT: %21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ ; GFX908-NEXT: undef [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub0:vreg_128 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
; GFX908-NEXT: {{ $}}
- ; GFX908-NEXT: %21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: %21.sub3:vreg_128 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: S_NOP 0, implicit %21
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]].sub3:vreg_128 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_21]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
@@ -5030,7 +5000,6 @@ body: |
%21.sub1:vreg_128 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%21.sub2:vreg_128 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5038,7 +5007,6 @@ body: |
S_NOP 0, implicit %21
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5137,14 +5105,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %23, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5242,7 +5208,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5250,7 +5215,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5348,7 +5312,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5357,7 +5320,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5456,7 +5418,6 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
@@ -5466,7 +5427,6 @@ body: |
S_NOP 0, implicit %22
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5562,14 +5522,12 @@ body: |
%22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 22, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %22, implicit %23
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %0, implicit %1
S_NOP 0, implicit %2, implicit %3
@@ -5617,7 +5575,7 @@ body: |
; GFX908-NEXT: [[V_CVT_I32_F64_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode, implicit-def $m0
; GFX908-NEXT: [[V_CVT_I32_F64_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode, implicit-def $m0
- ; GFX908-NEXT: undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
+ ; GFX908-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.1:
; GFX908-NEXT: successors: %bb.2(0x80000000)
@@ -5626,7 +5584,7 @@ body: |
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_23]]
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: S_NOP 0, implicit %23.sub1
+ ; GFX908-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]], implicit [[V_CVT_I32_F64_e32_1]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]], implicit [[V_CVT_I32_F64_e32_3]]
; GFX908-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_4]], implicit [[V_CVT_I32_F64_e32_5]]
@@ -5669,14 +5627,12 @@ body: |
undef %23.sub1:vreg_64_align2 = V_MOV_B32_e32 23, implicit $exec
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23.sub1
S_NOP 0, implicit %0, implicit %1
@@ -5779,14 +5735,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
DBG_VALUE %23, 0, 0
S_NOP 0, implicit %23
@@ -5889,14 +5843,12 @@ body: |
%23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
bb.1:
- ; predecessors: %bb.0
successors: %bb.2
%24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode, implicit-def $m0
S_NOP 0, implicit %24
bb.2:
- ; predcessors: %bb.1
S_NOP 0, implicit %23
S_NOP 0, implicit %0, implicit %1
diff --git a/llvm/test/test.mir b/llvm/test/test.mir
new file mode 100644
index 000000000000000..8eddc5ff26cd147
--- /dev/null
+++ b/llvm/test/test.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -amdgpu-disable-unclustred-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX908 %s
+
+---
+name: test_occ_7_sink_for_8_occ
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %10:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode
+ %11:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ %12:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ %13:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ %14:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode
+ %15:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 5, implicit $exec, implicit $mode
+ %16:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 6, implicit $exec, implicit $mode
+ %17:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 7, implicit $exec, implicit $mode
+ %18:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 8, implicit $exec, implicit $mode
+ %19:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 9, implicit $exec, implicit $mode
+ %20:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 10, implicit $exec, implicit $mode
+ %21:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 11, implicit $exec, implicit $mode
+ %22:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 12, implicit $exec, implicit $mode
+ %23:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 13, implicit $exec, implicit $mode
+ %24:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 14, implicit $exec, implicit $mode
+ %25:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 15, implicit $exec, implicit $mode
+ %26:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 16, implicit $exec, implicit $mode
+ %27:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 17, implicit $exec, implicit $mode
+ %28:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 18, implicit $exec, implicit $mode
+ %29:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 19, implicit $exec, implicit $mode
+ %30:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 20, implicit $exec, implicit $mode
+ %31:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 21, implicit $exec, implicit $mode
+ %32:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 22, implicit $exec, implicit $mode
+ %33:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 23, implicit $exec, implicit $mode
+ %34:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 24, implicit $exec, implicit $mode
+ %35:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 25, implicit $exec, implicit $mode
+ %36:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 26, implicit $exec, implicit $mode
+ %37:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 27, implicit $exec, implicit $mode
+ %38:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 28, implicit $exec, implicit $mode
+ %39:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 29, implicit $exec, implicit $mode
+ %40:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 30, implicit $exec, implicit $mode
+ %41:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 31, implicit $exec, implicit $mode
+
+ bb.1:
+ S_NOP 0, implicit %41
+
+ bb.3:
+ S_NOP 0, implicit %10, implicit %20
+ S_NOP 0, implicit %11, implicit %21
+ S_NOP 0, implicit %12, implicit %22
+ S_NOP 0, implicit %13, implicit %23
+ S_NOP 0, implicit %14, implicit %24
+ S_NOP 0, implicit %15, implicit %25
+ S_NOP 0, implicit %16, implicit %26
+ S_NOP 0, implicit %17, implicit %27
+ S_NOP 0, implicit %18, implicit %28
+ S_NOP 0, implicit %19, implicit %29
+ S_NOP 0, implicit %30, implicit %31
+ S_NOP 0, implicit %32, implicit %33
+ S_NOP 0, implicit %32, implicit %33
+ S_NOP 0, implicit %34, implicit %35
+ S_NOP 0, implicit %36, implicit %37
+ S_NOP 0, implicit %38, implicit %39
+ S_NOP 0, implicit %40
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list