[llvm] [AMDGPU] Physical register tracking in GCN trackers. (PR #177223)
Dhruva Chakrabarti via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 21 10:53:58 PST 2026
https://github.com/dhruvachak updated https://github.com/llvm/llvm-project/pull/177223
>From 63586fbd6117f7ccf1aff555801e63eb6653dc9a Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 20 Jan 2026 14:32:55 -0600
Subject: [PATCH 1/6] [AMDGPU] Physical register tracking in GCN trackers.
Previously, the GCN tracker only monitored virtual registers, leading to
inaccurate register pressure estimates and sub-optimal scheduling decisions
when physical registers were present. This patch adds support for tracking
physical registers in GCN trackers. Virtual and physical register tracking
are maintained separately. Similar to virtual LiveRegs, physical LiveRegs
are now maintained. Special register filtering has been implemented to
match the behavior of the existing generic trackers. The tracking flow
closely follows that of the GCN trackers in the recede, advance, and
pressure-increment methods.
Tracking physical registers leads to better register allocation, no more
allocation failures, and more accurate pressure estimates. Existing tests
have been updated to reflect the above. A new test schedule-gcn-physreg-pressure
has been added that validates physical register tracking across multiple
scenarios.
Assisted-by: Cursor
---
llvm/include/llvm/CodeGen/RegisterPressure.h | 14 +
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 381 +++-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 86 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 18 +-
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 1 +
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 120 ++
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 11 +
.../machine-scheduler-sink-trivial-remats.mir | 94 +-
.../AMDGPU/materialize-frame-index-sgpr.ll | 1558 +++++++++++++++++
.../schedule-amdgpu-tracker-physreg-crash.ll | 10 +-
.../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 32 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 364 ++++
13 files changed, 2550 insertions(+), 141 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 7485be6dcb351..01a944f386014 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -293,6 +293,20 @@ class LiveRegSet {
}
public:
+ LiveRegSet() = default;
+
+ // Copy assignment operator - copies live register contents.
+ // Note: Both LiveRegSets must have been initialized with init() first.
+ LiveRegSet &operator=(const LiveRegSet &Other) {
+ if (this != &Other) {
+ NumRegUnits = Other.NumRegUnits;
+ Regs.clear();
+ for (const IndexMaskPair &Pair : Other.Regs)
+ Regs.insert(Pair);
+ }
+ return *this;
+ }
+
LLVM_ABI void clear();
LLVM_ABI void init(const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index f253a841f16a6..2e6b399dd3ff4 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -242,6 +242,7 @@ GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
, Context(C)
, Strategy(S)
, UPTracker(*LIS) {
+ UPTracker.initPhysLiveRegs(Context->MF->getRegInfo());
}
// returns max pressure for a region
@@ -282,6 +283,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
Range &&Schedule) const {
auto const BBEnd = R.Begin->getParent()->end();
GCNUpwardRPTracker RPTracker(*LIS);
+ RPTracker.initPhysLiveRegs(MF.getRegInfo());
if (R.End != BBEnd) {
// R.End points to the boundary instruction but the
// schedule doesn't include it
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 89307ef9767b7..9f2bc73aca4ad 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -64,9 +64,21 @@ void GCNRegPressure::inc(unsigned Reg,
assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
"prev mask should always be lesser than new");
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ const TargetRegisterClass *RC;
+ if (Register(Reg).isVirtual()) {
+ RC = MRI.getRegClass(Reg);
+ } else {
+ // For physical registers, skip non-allocatable registers (reserved,
+ // special, etc.).
+ if (!MRI.isAllocatable(Reg))
+ return;
+ // For physical registers, get the minimal register class.
+ RC = TRI->getMinimalPhysRegClass(Reg);
+ if (!RC)
+ return;
+ }
unsigned RegKind = getRegKind(RC, STI);
if (TRI->getRegSizeInBits(*RC) != 32) {
// Reg is from a tuple register class.
@@ -447,6 +459,28 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
}
+// Helper to get the physical register that owns a register unit.
+Register GCNRPTracker::getPhysRegFromUnit(MCRegUnit Unit) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+
+ // Return the first allocatable register that contains this unit.
+ for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
+ Register Reg(*RI);
+ if (MRI->isAllocatable(Reg))
+ return Reg;
+ }
+ return Register();
+}
+
+// Helper toheck if a register unit is live at a given slot index.
+bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
+ const LiveRange *LR = LIS.getCachedRegUnit(Unit);
+ if (!LR)
+ return false;
+ return LR->liveAt(SI);
+}
+
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter) {
@@ -468,7 +502,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
GCNRegPressure::RegKind RegKind) {
- GCNRPTracker::LiveRegSet LiveRegs;
+ GCNRPTracker::LiveRegSet VirtLiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
if (RegKind != GCNRegPressure::TOTAL_KINDS &&
@@ -478,33 +512,49 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
continue;
auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
if (LiveMask.any())
- LiveRegs[Reg] = LiveMask;
+ VirtLiveRegs[Reg] = LiveMask;
}
- return LiveRegs;
+ return VirtLiveRegs;
}
void GCNRPTracker::reset(const MachineInstr &MI,
- const LiveRegSet *LiveRegsCopy,
- bool After) {
+ const LiveRegSet *VirtLiveRegsCopy, bool After) {
const MachineFunction &MF = *MI.getMF();
MRI = &MF.getRegInfo();
- if (LiveRegsCopy) {
- if (&LiveRegs != LiveRegsCopy)
- LiveRegs = *LiveRegsCopy;
+
+ if (VirtLiveRegsCopy) {
+ if (&VirtLiveRegs != VirtLiveRegsCopy)
+ VirtLiveRegs = *VirtLiveRegsCopy;
} else {
- LiveRegs = After ? getLiveRegsAfter(MI, LIS)
- : getLiveRegsBefore(MI, LIS);
+ VirtLiveRegs =
+ After ? getLiveRegsAfter(MI, LIS) : getLiveRegsBefore(MI, LIS);
}
- MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
+ MaxVirtPressure = CurVirtPressure = getRegPressure(*MRI, VirtLiveRegs);
+
+ // Clear physical register tracking (only if enabled)
+ if (TrackPhysRegs) {
+ PhysLiveRegs.clear();
+ PhysLiveRegs.init(*MRI);
+ MaxPhysPressure.clear();
+ CurPhysPressure.clear();
+ }
}
void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
- LiveRegs = LiveRegs_;
+ VirtLiveRegs = LiveRegs_;
LastTrackedMI = nullptr;
- MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
+ MaxVirtPressure = CurVirtPressure = getRegPressure(MRI_, LiveRegs_);
+
+ // Clear physical register tracking (only if enabled)
+ if (TrackPhysRegs) {
+ PhysLiveRegs.clear();
+ PhysLiveRegs.init(*MRI);
+ MaxPhysPressure.clear();
+ CurPhysPressure.clear();
+ }
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
@@ -545,46 +595,103 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
} else
DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
- auto I = LiveRegs.find(Reg);
- if (I == LiveRegs.end())
+ auto I = VirtLiveRegs.find(Reg);
+ if (I == VirtLiveRegs.end())
continue;
LaneBitmask &LiveMask = I->second;
LaneBitmask PrevMask = LiveMask;
LiveMask &= ~DefMask;
- CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ CurVirtPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
- LiveRegs.erase(I);
+ VirtLiveRegs.erase(I);
}
- // Update MaxPressure with defs pressure.
- DefPressure += CurPressure;
+ // Update MaxVirtPressure with defs pressure.
+ DefPressure += CurVirtPressure;
if (HasECDefs)
DefPressure += ECDefPressure;
- MaxPressure = max(DefPressure, MaxPressure);
+ MaxVirtPressure = max(DefPressure, MaxVirtPressure);
// Make uses alive.
SmallVector<VRegMaskOrUnit, 8> RegUses;
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
for (const VRegMaskOrUnit &U : RegUses) {
- LaneBitmask &LiveMask = LiveRegs[U.VRegOrUnit.asVirtualReg()];
+ LaneBitmask &LiveMask = VirtLiveRegs[U.VRegOrUnit.asVirtualReg()];
LaneBitmask PrevMask = LiveMask;
LiveMask |= U.LaneMask;
- CurPressure.inc(U.VRegOrUnit.asVirtualReg(), PrevMask, LiveMask, *MRI);
+ CurVirtPressure.inc(U.VRegOrUnit.asVirtualReg(), PrevMask, LiveMask, *MRI);
}
- // Update MaxPressure with uses plus early-clobber defs pressure.
- MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
- : max(CurPressure, MaxPressure);
+ // Update MaxVirtPressure with uses plus early-clobber defs pressure.
+ MaxVirtPressure = HasECDefs
+ ? max(CurVirtPressure + ECDefPressure, MaxVirtPressure)
+ : max(CurVirtPressure, MaxVirtPressure);
+
+ // Track physical register defs and uses (only if enabled).
+ if (TrackPhysRegs) {
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
+
+ // Kill physical register defs (moving backward in upward tracking).
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
+ continue;
+
+ // Check if any unit of this register was live before.
+ bool WasLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ WasLive = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ // Update pressure once per register if it was live.
+ if (WasLive)
+ CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+
+ // Make physical register uses alive (moving backward in upward tracking).
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
+ continue;
+ // Check if any unit of this register was not live before.
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none()) {
+ WasNotLive = true;
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ // Update pressure once per register if it wasn't live before.
+ if (WasNotLive) {
+ CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
+ }
- assert(CurPressure == getRegPressure(*MRI, LiveRegs));
+ assert(CurVirtPressure == getRegPressure(*MRI, VirtLiveRegs));
}
////////////////////////////////////////////////////////////////////////////////
// GCNDownwardRPTracker
bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
- const LiveRegSet *LiveRegsCopy) {
+ const LiveRegSet *VirtLiveRegsCopy) {
MRI = &MI.getMF()->getRegInfo();
LastTrackedMI = nullptr;
MBBEnd = MI.getParent()->end();
@@ -592,7 +699,7 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
if (NextMI == MBBEnd)
return false;
- GCNRPTracker::reset(*NextMI, LiveRegsCopy, false);
+ GCNRPTracker::reset(*NextMI, VirtLiveRegsCopy, false);
return true;
}
@@ -631,31 +738,71 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
continue;
const LiveInterval &LI = LIS.getInterval(MO.getReg());
if (LI.hasSubRanges()) {
- auto It = LiveRegs.end();
+ auto It = VirtLiveRegs.end();
for (const auto &S : LI.subranges()) {
if (!S.liveAt(SI)) {
- if (It == LiveRegs.end()) {
- It = LiveRegs.find(MO.getReg());
- if (It == LiveRegs.end())
+ if (It == VirtLiveRegs.end()) {
+ It = VirtLiveRegs.find(MO.getReg());
+ if (It == VirtLiveRegs.end())
llvm_unreachable("register isn't live");
}
auto PrevMask = It->second;
It->second &= ~S.LaneMask;
- CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
+ CurVirtPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
}
}
- if (It != LiveRegs.end() && It->second.none())
- LiveRegs.erase(It);
+ if (It != VirtLiveRegs.end() && It->second.none())
+ VirtLiveRegs.erase(It);
} else if (!LI.liveAt(SI)) {
- auto It = LiveRegs.find(MO.getReg());
- if (It == LiveRegs.end())
+ auto It = VirtLiveRegs.find(MO.getReg());
+ if (It == VirtLiveRegs.end())
llvm_unreachable("register isn't live");
- CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
- LiveRegs.erase(It);
+ CurVirtPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(),
+ *MRI);
+ VirtLiveRegs.erase(It);
+ }
+ }
+
+ // Track physical register deaths (only if enabled).
+ if (TrackPhysRegs) {
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
+
+ // Iterate over actual instruction operands to track which registers die.
+ SmallSet<Register, 8> SeenRegs;
+ for (const auto &MO : CurrMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is dying.
+ bool WasLive = false;
+ bool IsDying = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ WasLive = true;
+ // Use LiveIntervals to check if unit dies at SI.
+ if (!isUnitLiveAt(Unit, SI)) {
+ IsDying = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ }
+
+ // Update pressure once per register if it was live and is now dying.
+ if (WasLive && IsDying)
+ CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
}
}
- MaxPressure = max(MaxPressure, CurPressure);
+ MaxVirtPressure = max(MaxVirtPressure, CurVirtPressure);
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
LastTrackedMI = nullptr;
@@ -673,18 +820,47 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
const MachineInstr *CurrMI = LastTrackedMI;
- // Add new registers or mask bits.
+ // Add new registers or mask bits (virtual registers).
for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
- auto &LiveMask = LiveRegs[Reg];
+ auto &LiveMask = VirtLiveRegs[Reg];
auto PrevMask = LiveMask;
LiveMask |= getDefRegMask(MO, *MRI);
- CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ CurVirtPressure.inc(Reg, PrevMask, LiveMask, *MRI);
}
- MaxPressure = max(MaxPressure, CurPressure);
+ // Add new physical register defs (only if enabled).
+ if (TrackPhysRegs) {
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
+
+ for (const auto &MO : CurrMI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
+ continue;
+
+ // Check if any unit of this register was not live before.
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none())
+ WasNotLive = true;
+ // Mark unit as live
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+
+ // Update pressure once per register if it wasn't live before.
+ if (WasNotLive)
+ CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ MaxVirtPressure = max(MaxVirtPressure, CurVirtPressure);
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
}
bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator) {
@@ -708,8 +884,8 @@ bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator End) {
bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin,
MachineBasicBlock::const_iterator End,
- const LiveRegSet *LiveRegsCopy) {
- reset(*Begin, LiveRegsCopy);
+ const LiveRegSet *VirtLiveRegsCopy) {
+ reset(*Begin, VirtLiveRegsCopy);
return advance(End);
}
@@ -750,8 +926,10 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
RegisterOperands RegOpers;
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
- GCNRegPressure TempPressure = CurPressure;
+ GCNRegPressure TempVirtPressure = CurVirtPressure;
+ GCNRegPressure TempPhysPressure = CurPhysPressure;
+ // Process virtual register uses
for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
if (!Use.VRegOrUnit.isVirtualReg())
continue;
@@ -779,30 +957,92 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
if (LastUseMask.none())
continue;
- auto It = LiveRegs.find(Reg);
- LaneBitmask LiveMask = It != LiveRegs.end() ? It->second : LaneBitmask(0);
+ auto It = VirtLiveRegs.find(Reg);
+ LaneBitmask LiveMask =
+ It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
LaneBitmask NewMask = LiveMask & ~LastUseMask;
- TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ TempVirtPressure.inc(Reg, LiveMask, NewMask, *MRI);
}
- // Generate liveness for defs.
+ // Generate liveness for virtual register defs.
for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
if (!Def.VRegOrUnit.isVirtualReg())
continue;
Register Reg = Def.VRegOrUnit.asVirtualReg();
- auto It = LiveRegs.find(Reg);
- LaneBitmask LiveMask = It != LiveRegs.end() ? It->second : LaneBitmask(0);
+ auto It = VirtLiveRegs.find(Reg);
+ LaneBitmask LiveMask =
+ It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
LaneBitmask NewMask = LiveMask | Def.LaneMask;
- TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ TempVirtPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ }
+
+ // Process physical registers (only if enabled).
+ if (TrackPhysRegs) {
+ const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
+ SmallSet<Register, 8> SeenRegs;
+
+ // Process physical register defs.
+ for (const auto &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is not currently live.
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (PhysLiveRegs.contains(VirtRegOrUnit(static_cast<MCRegUnit>(Unit)))
+ .none()) {
+ WasNotLive = true;
+ break;
+ }
+ }
+
+ if (WasNotLive && !MO.isDead()) {
+ TempPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ // Process physical register uses to find kills.
+ SeenRegs.clear();
+ for (const auto &MO : MI->uses()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is dying.
+ bool IsDying = false;
+ bool IsLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
+ if (PhysLiveRegs.contains(VRU).any()) {
+ IsLive = true;
+ if (!isUnitLiveAt(Unit, SlotIdx)) {
+ IsDying = true;
+ break;
+ }
+ }
+ }
+
+ if (IsLive && IsDying) {
+ TempPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+ }
}
- return TempPressure;
+ // Return sum of virtual and physical pressure
+ return TempVirtPressure + TempPhysPressure;
}
bool GCNUpwardRPTracker::isValid() const {
const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
- const auto &TrackedLR = LiveRegs;
+ const auto &TrackedLR = VirtLiveRegs;
if (!isEqual(LISLR, TrackedLR)) {
dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
@@ -813,22 +1053,22 @@ bool GCNUpwardRPTracker::isValid() const {
}
auto LISPressure = getRegPressure(*MRI, LISLR);
- if (LISPressure != CurPressure) {
+ if (LISPressure != CurVirtPressure) {
dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: "
- << print(CurPressure) << "LIS rpt: " << print(LISPressure);
+ << print(CurVirtPressure) << "LIS rpt: " << print(LISPressure);
return false;
}
return true;
}
-Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs,
+Printable llvm::print(const GCNRPTracker::LiveRegSet &VirtLiveRegs,
const MachineRegisterInfo &MRI) {
- return Printable([&LiveRegs, &MRI](raw_ostream &OS) {
+ return Printable([&VirtLiveRegs, &MRI](raw_ostream &OS) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
Register Reg = Register::index2VirtReg(I);
- auto It = LiveRegs.find(Reg);
- if (It != LiveRegs.end() && It->second.any())
+ auto It = VirtLiveRegs.find(Reg);
+ if (It != VirtLiveRegs.end() && It->second.any())
OS << ' ' << printReg(Reg, TRI) << ':' << PrintLaneMask(It->second);
}
OS << '\n';
@@ -925,6 +1165,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RPAtMBBEnd = getRegPressure(MRI, LiveIn);
} else {
GCNDownwardRPTracker RPT(LIS);
+ RPT.initPhysLiveRegs(MRI);
RPT.reset(MBB.front());
LiveIn = RPT.getLiveRegs();
@@ -940,6 +1181,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
} else {
GCNUpwardRPTracker RPT(LIS);
+ RPT.initPhysLiveRegs(MRI);
RPT.reset(MRI, MBBLastSlot);
LiveOut = RPT.getLiveRegs();
@@ -1008,8 +1250,9 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
const char *RegName = GCNRegPressure::getName(Kind);
unsigned MaxNumRegs = 0;
- const MachineInstr *MaxPressureMI = nullptr;
+ const MachineInstr *MaxVirtPressureMI = nullptr;
GCNUpwardRPTracker RPT(LIS);
+ RPT.initPhysLiveRegs(MRI);
for (const MachineBasicBlock &MBB : MF) {
RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
for (const MachineInstr &MI : reverse(MBB)) {
@@ -1017,12 +1260,12 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
if (NumRegs > MaxNumRegs) {
MaxNumRegs = NumRegs;
- MaxPressureMI = &MI;
+ MaxVirtPressureMI = &MI;
}
}
}
- SlotIndex MISlot = LIS.getInstructionIndex(*MaxPressureMI);
+ SlotIndex MISlot = LIS.getInstructionIndex(*MaxVirtPressureMI);
// Max pressure can occur at either the early-clobber or register slot.
// Choose the maximum liveset between both slots. This is ugly but this is
@@ -1035,7 +1278,7 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
unsigned RNumRegs = getRegPressure(MRI, RLiveSet).getNumRegs(Kind);
GCNRPTracker::LiveRegSet *LiveSet =
ECNumRegs > RNumRegs ? &ECLiveSet : &RLiveSet;
- SlotIndex MaxPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
+ SlotIndex MaxVirtPressureSlot = ECNumRegs > RNumRegs ? ECSlot : RSlot;
assert(getRegPressure(MRI, *LiveSet).getNumRegs(Kind) == MaxNumRegs);
// Split live registers into single-def and multi-def sets.
@@ -1097,8 +1340,8 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
OS << "\n*** Register pressure info (" << RegName << "s) for " << MF.getName()
<< " ***\n";
OS << "Max pressure is " << MaxNumRegs << ' ' << RegName << "s at "
- << printLoc(MaxPressureMI->getParent(), MaxPressureSlot) << ": "
- << *MaxPressureMI;
+ << printLoc(MaxVirtPressureMI->getParent(), MaxVirtPressureSlot) << ": "
+ << *MaxVirtPressureMI;
OS << "\nLive registers with single definition (" << SDefNumRegs << ' '
<< RegName << "s):\n";
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 15853a35d230e..6eabcececac95 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -285,13 +285,41 @@ class GCNRPTracker {
protected:
const LiveIntervals &LIS;
- LiveRegSet LiveRegs;
- GCNRegPressure CurPressure, MaxPressure;
+
+ // Virtual register tracking
+ LiveRegSet VirtLiveRegs;
+ GCNRegPressure CurVirtPressure, MaxVirtPressure;
+
+ // Physical register tracking
+ llvm::LiveRegSet PhysLiveRegs;
+ GCNRegPressure CurPhysPressure, MaxPhysPressure;
+
+ // Flag to control whether physical register tracking is active.
+ // Set to true when GCNTrackers are enabled, false otherwise.
+ bool TrackPhysRegs = false;
+
const MachineInstr *LastTrackedMI = nullptr;
mutable const MachineRegisterInfo *MRI = nullptr;
GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
+ // Copy constructor - PhysLiveRegs must be initialized then copied.
+ GCNRPTracker(const GCNRPTracker &Other)
+ : LIS(Other.LIS), VirtLiveRegs(Other.VirtLiveRegs),
+ CurVirtPressure(Other.CurVirtPressure),
+ MaxVirtPressure(Other.MaxVirtPressure),
+ CurPhysPressure(Other.CurPhysPressure),
+ MaxPhysPressure(Other.MaxPhysPressure),
+ TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
+ MRI(Other.MRI) {
+ // Initialize PhysLiveRegs with proper universe, then copy contents.
+ if (MRI) {
+ PhysLiveRegs.init(*MRI);
+ PhysLiveRegs =
+ Other.PhysLiveRegs; // Use assignment operator to copy live regs.
+ }
+ }
+
void reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy,
bool After);
@@ -300,20 +328,50 @@ class GCNRPTracker {
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const;
+ // Helper methods for physical register tracking
+ Register getPhysRegFromUnit(MCRegUnit Unit) const;
+ bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
+
public:
+ // Initialize PhysLiveRegs capacity. Must be called before first use.
+ void initPhysLiveRegs(const MachineRegisterInfo &MRI_) {
+ PhysLiveRegs.init(MRI_);
+ }
+
+ // Enable physical register tracking. Should only be called when GCNTrackers
+ // are enabled to avoid changing behavior when using generic trackers.
+ void enablePhysTracking() { TrackPhysRegs = true; }
+
// reset tracker and set live register set to the specified value.
void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
+
// live regs for the current state
- const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
+ const decltype(VirtLiveRegs) &getLiveRegs() const { return VirtLiveRegs; }
+ const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
- void clearMaxPressure() { MaxPressure.clear(); }
+ void clearMaxPressure() {
+ MaxVirtPressure.clear();
+ MaxPhysPressure.clear();
+ }
+
+ // Returns sum of virtual and physical register pressure
+ GCNRegPressure getPressure() const {
+ return CurVirtPressure + CurPhysPressure;
+ }
- GCNRegPressure getPressure() const { return CurPressure; }
+ // Returns only virtual register pressure
+ GCNRegPressure getVirtPressure() const { return CurVirtPressure; }
- decltype(LiveRegs) moveLiveRegs() {
- return std::move(LiveRegs);
+ // Returns only physical register pressure
+ GCNRegPressure getPhysPressure() const { return CurPhysPressure; }
+
+ // Returns sum of virtual and physical max pressure
+ GCNRegPressure getMaxPressure() const {
+ return MaxVirtPressure + MaxPhysPressure;
}
+
+ decltype(VirtLiveRegs) moveLiveRegs() { return std::move(VirtLiveRegs); }
};
GCNRPTracker::LiveRegSet
@@ -355,12 +413,13 @@ class GCNUpwardRPTracker : public GCNRPTracker {
/// to reported by LIS.
bool isValid() const;
- const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
-
- void resetMaxPressure() { MaxPressure = CurPressure; }
+ void resetMaxPressure() {
+ MaxVirtPressure = CurVirtPressure;
+ MaxPhysPressure = CurPhysPressure;
+ }
GCNRegPressure getMaxPressureAndReset() {
- GCNRegPressure RP = MaxPressure;
+ GCNRegPressure RP = getMaxPressure();
resetMaxPressure();
return RP;
}
@@ -384,8 +443,9 @@ class GCNDownwardRPTracker : public GCNRPTracker {
/// \p return MaxPressure and clear it.
GCNRegPressure moveMaxPressure() {
- auto Res = MaxPressure;
- MaxPressure.clear();
+ auto Res = getMaxPressure();
+ MaxVirtPressure.clear();
+ MaxPhysPressure.clear();
return Res;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index cabf759762a72..f69be8432b5eb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -101,7 +101,6 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
MF = &DAG->MF;
-
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
SGPRExcessLimit =
@@ -971,6 +970,9 @@ void GCNScheduleDAGMILive::schedule() {
GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
GCNDownwardRPTracker RPTracker(*LIS);
+ RPTracker.initPhysLiveRegs(MF.getRegInfo());
+ if (GCNTrackers)
+ RPTracker.enablePhysTracking();
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&LiveIns[RegionIdx]);
return RPTracker.moveMaxPressure();
@@ -985,6 +987,9 @@ static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
const MachineBasicBlock *MBB) {
GCNDownwardRPTracker RPTracker(*LIS);
+ RPTracker.initPhysLiveRegs(MF.getRegInfo());
+ if (GCNTrackers)
+ RPTracker.enablePhysTracking();
// If the block has the only successor then live-ins of that successor are
// live-outs of the current block. We can reuse calculated live set if the
@@ -1117,7 +1122,6 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
-
if (!Regions.empty()) {
BBLiveInMap = getRegionLiveInMap();
if (GCNTrackers)
@@ -1133,6 +1137,13 @@ void GCNScheduleDAGMILive::runSchedStages() {
#endif
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
+ // Initialize physical register tracking in GCN trackers.
+ S.getDownwardTracker()->initPhysLiveRegs(MF.getRegInfo());
+ S.getUpwardTracker()->initPhysLiveRegs(MF.getRegInfo());
+ if (GCNTrackers) {
+ S.getDownwardTracker()->enablePhysTracking();
+ S.getUpwardTracker()->enablePhysTracking();
+ }
while (S.advanceStage()) {
auto Stage = createSchedStage(S.getCurrentStage());
if (!Stage->initGCNSchedStage())
@@ -2140,6 +2151,9 @@ void PreRARematStage::rematerialize() {
RP = getRegPressure(DAG.MRI, DAG.LiveIns[I]);
} else {
GCNDownwardRPTracker RPT(*DAG.LIS);
+ RPT.initPhysLiveRegs(DAG.MRI);
+ if (GCNTrackers)
+ RPT.enablePhysTracking();
auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,
DAG.Regions[I].second);
if (NonDbgMI == DAG.Regions[I].second) {
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 6b13b06590102..b3b8c7e8b4350 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -278,6 +278,7 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
GCNDownwardRPTracker RPT(*LIS);
+ RPT.initPhysLiveRegs(*MRI);
MachineBasicBlock::instr_iterator Next;
for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
MachineInstr &MI = *I;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 409509120c32d..e4890f952a50f 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3781,6 +3781,126 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
return RC && isAGPRClass(RC);
}
+// Helper used by GCN trackers to check if a physical register should be tracked
+// for pressure.
+bool SIRegisterInfo::isReservedSpecialRegister(Register Reg) {
+ if (!Reg.isPhysical())
+ return false;
+
+ // Check all special-purpose registers that are reserved in getReservedRegs().
+ // These registers don't contribute to general register pressure and
+ // correspond to register classes with GeneratePressureSet = 0 in .td files.
+ switch (Reg.id()) {
+ // MODE - floating point mode register
+ case AMDGPU::MODE:
+
+ // EXEC - execution mask (and sub-registers)
+ case AMDGPU::EXEC:
+ case AMDGPU::EXEC_LO:
+ case AMDGPU::EXEC_HI:
+
+ // FLAT_SCR - flat scratch address (and sub-registers)
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
+
+ // M0 - memory operation descriptor
+ case AMDGPU::M0:
+
+ // VCC - condition code register (and sub-registers)
+ case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
+
+ // SCC - scalar condition code
+ case AMDGPU::SCC:
+
+ // Special source operands
+ case AMDGPU::SRC_VCCZ:
+ case AMDGPU::SRC_EXECZ:
+ case AMDGPU::SRC_SCC:
+
+ // Memory aperture registers
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
+ case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
+ case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
+
+ // Async counter pseudo registers
+ case AMDGPU::ASYNCcnt:
+ case AMDGPU::TENSORcnt:
+
+ // Other special registers
+ case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
+ case AMDGPU::LDS_DIRECT:
+
+ // XNACK_MASK (and sub-registers) - page fault handling
+ case AMDGPU::XNACK_MASK:
+ case AMDGPU::XNACK_MASK_LO:
+ case AMDGPU::XNACK_MASK_HI:
+
+ // Trap handler registers (TBA/TMA and their sub-registers)
+ case AMDGPU::TBA:
+ case AMDGPU::TBA_LO:
+ case AMDGPU::TBA_HI:
+ case AMDGPU::TMA:
+ case AMDGPU::TMA_LO:
+ case AMDGPU::TMA_HI:
+
+ // Trap handler temporary registers (tuples and various register sizes)
+ case AMDGPU::TTMP0_TTMP1:
+ case AMDGPU::TTMP2_TTMP3:
+ case AMDGPU::TTMP4_TTMP5:
+ case AMDGPU::TTMP6_TTMP7:
+ case AMDGPU::TTMP8_TTMP9:
+ case AMDGPU::TTMP10_TTMP11:
+ case AMDGPU::TTMP12_TTMP13:
+ case AMDGPU::TTMP14_TTMP15:
+
+ // Null register
+ case AMDGPU::SGPR_NULL64:
+ return true;
+
+ default:
+ return false;
+ }
+
+ // Note: Individual TTMP registers (TTMP0-TTMP15, etc.) and other
+ // sub-registers are reserved via reserveRegisterTuples() in
+ // getReservedRegs(), which marks all aliases as non-allocatable. They don't
+ // need explicit checks here since shouldTrackRegisterForPressure() filters
+ // non-allocatable registers.
+}
+
+// Helper to check if a physical register should be tracked for pressure by GCN
+// trackers. Returns true for actual SGPRs/VGPRs/AGPRs, false for special
+// registers (VCC, EXEC, M0, etc.).
+//
+// The generic RegPressureTracker avoids counting special registers through
+// pressure sets: register classes with "GeneratePressureSet = 0" in the .td
+// files don't contribute to pressure. Since GCNRPTracker counts registers
+// directly, we need to explicitly filter special registers to match the generic
+// tracker's behavior.
+//
+// This list corresponds to special registers that:
+// 1. Are in register classes with GeneratePressureSet = 0 (see
+// SIRegisterInfo.td)
+// 2. Are reserved in getReservedRegs() (see SIRegisterInfo.cpp)
+// 3. Shouldn't count toward actual SGPR/VGPR/AGPR pressure
+bool SIRegisterInfo::shouldTrackRegisterForPressure(
+ const MachineRegisterInfo &MRI, Register Reg) const {
+ // Only track physical, allocatable registers
+ if (!Reg.isPhysical() || !MRI.isAllocatable(Reg))
+ return false;
+
+ if (isReservedSpecialRegister(Reg))
+ return false;
+
+ return isSGPRReg(MRI, Reg) || isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
+}
+
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 2e2916f68f584..a3cadef3e6bc1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -332,6 +332,17 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
+ /// Returns true if the register is a special-purpose/reserved register that
+ /// appears in getReservedRegs(). These registers don't contribute to general
+ /// register pressure (e.g., VCC, EXEC, M0, trap handler registers, etc.).
+ static bool isReservedSpecialRegister(Register Reg);
+
+ /// Returns true if the register should contribute to register pressure
+ /// tracking for scheduling purposes. Filters out special-purpose registers
+ /// that don't count toward SGPR/VGPR/AGPR limits (e.g., VCC, EXEC, M0).
+ bool shouldTrackRegisterForPressure(const MachineRegisterInfo &MRI,
+ Register Reg) const;
+
// FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
// (such as VCC) which hold a wave-wide vector of boolean values. Examining
// just the register class is not suffcient; it needs to be combined with a
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 8d24f6ba66968..8e6b46a4b6bbb 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -10957,73 +10957,73 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
; GFX908-GCNTRACKERS-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]]
; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -11301,18 +11301,18 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-GCNTRACKERS-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 481eb1bc3d91a..d076d8ab55d50 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -7,6 +7,14 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX7-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX8-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefixes=GFX900-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefixes=GFX942-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX10_1-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX10_3-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX11-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX12-GCNTRACKERS %s
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
<16 x i32>, <7 x i32>, ; vgprs
@@ -563,6 +571,540 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX7-GCNTRACKERS-NEXT: v_add_i32_e32 v0, vcc, 64, v0
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX7-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, 0x4040
+; GFX7-GCNTRACKERS-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX7-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v0, 6, v0
+; GFX7-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX8-GCNTRACKERS-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX8-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, 0x4040
+; GFX8-GCNTRACKERS-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v0, 6, v0
+; GFX8-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 0x4040, v0
+; GFX900-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
+; GFX10_1-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v24
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
+; GFX10_3-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v24
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX12-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: s_add_co_ci_u32 s59, s32, 0x4000
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX12-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
@@ -1084,6 +1626,485 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX7-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v22, 6, v22
+; GFX7-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v22, 6, v22
+; GFX8-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 6, s32
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v22, 16, v22
+; GFX900-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4010
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 16
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4010
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v22, 16, v22
+; GFX10_1-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v22, 16, v22
+; GFX10_3-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4010
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 16
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4010
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s32
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
; Force no SGPRs to be available for the carry-out of the vector add.
@@ -1660,6 +2681,543 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s28, 17
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s29, 18
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX7-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX7-GCNTRACKERS-NEXT: v_add_i32_e32 v0, vcc, 64, v0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s28, v23, 17
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s29, v23, 18
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX8-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX8-GCNTRACKERS-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX900-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s58, s4, 0x4240
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s58, s4, 0x4240
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX11-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX12-GCNTRACKERS-NEXT: s_add_co_i32 s58, s32, 0x4200
+; GFX12-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
index f70cd6816a966..4771c8c419468 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
@@ -1,5 +1,5 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 2>&1 < %s | FileCheck -check-prefixes=ERR-GCNTRACKERS %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack 2>&1 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 2>&1 < %s | FileCheck -check-prefixes=GCN-TRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack 2>&1 < %s | FileCheck -check-prefixes=GCN %s
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
<16 x i32>, <7 x i32>, ; vgprs
@@ -16,10 +16,12 @@
i64 ; vcc
}
-; ERR-GCNTRACKERS: ran out of registers during register allocation
+; GCN-TRACKERS-NOT: ran out of registers during register allocation
; GCN-NOT: ran out of registers during register allocation
-; FIXME: GCN Trackers do not track pressure from PhysRegs, so scheduling is actually worse
+; GCN Trackers now track physical register pressure correctly, so this test
+; verifies that both trackers can successfully handle code with heavy physical
+; register usage from inline assembly.
define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 {
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 0d25bc97ff775..0d81a11243ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -1,17 +1,31 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN-GCNTRACKERS %s
-
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED-GCNTRACKERS %s
+; REQUIRES: asserts
; CHECK-LABEL: {{^}}spill:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
; GCN: NumVgprs: 1
-; GCN-GCNTRACKERS: NumVgprs: 2
+; GCN-GCNTRACKERS: NumVgprs: 1
; GCN: ScratchSize: 0
; GCN-GCNTRACKERS: ScratchSize: 0
; GCN: Occupancy: 5
; GCN-GCNTRACKERS: Occupancy: 5
-
-; FIXME: GCN Trackers do not track pressure from PhysRegs, so scheduling is actually worse
+;
+; Check scheduling pressure values:
+; SCHED-LABEL: spill:%bb.0 entry
+; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
+; SCHED: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 97
+;
+; SCHED-GCNTRACKERS-LABEL: spill:%bb.0 entry
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 99
+; SCHED-GCNTRACKERS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 98
+;
+; NOTE: GCN Trackers now track pressure from both virtual and physical registers.
+; The GCN tracker now matches the generic tracker's VGPR count (1 VGPR).
+; The SGPR pressure is still slightly higher (98 vs 97) due to summing physical
+; register pressure from inline asm constraints with virtual register pressure.
define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
@@ -247,9 +261,15 @@ bb3:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
; GCN: NumVgprs: 2
-; GCN-GCNTRACKERS: NumVgprs: 3
+; GCN-GCNTRACKERS: NumVgprs: 2
; GCN: ScratchSize: 8
-; GCN-GCNTRACKERS: ScratchSize: 12
+; GCN-GCNTRACKERS: ScratchSize: 8
+;
+; SCHED-LABEL: spill_func:%bb.0 entry
+; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 97
+;
+; SCHED-GCNTRACKERS-LABEL: spill_func:%bb.0 entry
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
define void @spill_func(ptr addrspace(1) %arg) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
new file mode 100644
index 0000000000000..ce076faa91fd5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -0,0 +1,364 @@
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
+; REQUIRES: asserts
+
+; Test that GCN trackers correctly track physical register pressure from inline asm
+
+; GCN-DEBUG-LABEL: test_single_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_single_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test multiple physical registers
+
+; GCN-DEBUG-LABEL: test_multiple_physregs
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_multiple_physregs
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register with virtual registers
+
+; GCN-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+entry:
+ %asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ %val = load i32, ptr addrspace(1) %in
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test that we don't inflate pressure when not using GCN trackers
+
+; GCN-DEBUG-LABEL: test_no_inflation
+
+; GENERIC-DEBUG-LABEL: test_no_inflation
+
+define amdgpu_kernel void @test_no_inflation() {
+entry:
+ ret void
+}
+
+; Test early-clobber constraint
+
+; GCN-DEBUG-LABEL: test_early_clobber
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_early_clobber
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register input
+
+; GCN-DEBUG-LABEL: test_physreg_input
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_physreg_input
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test virtual and physical register overlap
+
+; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+
+; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+
+define amdgpu_kernel void @test_vreg_and_physreg_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ %val1 = load i32, ptr addrspace(1) %in1
+ %val2 = load i32, ptr addrspace(1) %in2
+ %sum = add i32 %val1, %val2
+ store i32 %sum, ptr addrspace(1) %out
+ ret void
+}
+
+; Verify assembly output for GCN trackers
+; GCN-LABEL: test_single_physreg:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_single_physreg.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_multiple_physregs:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_multiple_physregs.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_physreg_with_vreg:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_mov_b32 s4, s2
+; GCN-NEXT: s_mov_b32 s5, s3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_no_inflation:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_endpgm
+; GCN: .set test_no_inflation.numbered_sgpr, 0
+; GCN: TotalNumSgprs: 0
+; GCN: NumVgprs: 0
+
+; GCN-LABEL: test_early_clobber:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_early_clobber.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_physreg_input:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_physreg_input.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: s_mov_b32 s0, s2
+; GCN-NEXT: s_mov_b32 s1, s3
+; GCN-NEXT: s_mov_b32 s2, s6
+; GCN-NEXT: s_mov_b32 s3, s7
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NEXT: s_mov_b32 s10, s6
+; GCN-NEXT: s_mov_b32 s11, s7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 14
+; GCN: NumVgprs: 2
+
+; Verify assembly output without GCN trackers (should be identical)
+; NO-GCN-LABEL: test_single_physreg:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_single_physreg.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_multiple_physregs:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_multiple_physregs.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_physreg_with_vreg:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_mov_b32 s4, s2
+; NO-GCN-NEXT: s_mov_b32 s5, s3
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_no_inflation:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_no_inflation.numbered_sgpr, 0
+; NO-GCN: TotalNumSgprs: 0
+; NO-GCN: NumVgprs: 0
+
+; NO-GCN-LABEL: test_early_clobber:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_early_clobber.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_physreg_input:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_physreg_input.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_vreg_and_physreg_overlap:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: s_mov_b32 s0, s2
+; NO-GCN-NEXT: s_mov_b32 s1, s3
+; NO-GCN-NEXT: s_mov_b32 s2, s6
+; NO-GCN-NEXT: s_mov_b32 s3, s7
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; NO-GCN-NEXT: s_mov_b32 s10, s6
+; NO-GCN-NEXT: s_mov_b32 s11, s7
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 14
+; NO-GCN: NumVgprs: 2
>From 1aaae10e2c5049608465f536c0b9f9eabd3918e2 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Sat, 31 Jan 2026 15:14:18 -0600
Subject: [PATCH 2/6] [AMDGPU] Refactored code based on reviewer comments.
Moved regunit iteration code to GCNRPTracker methods.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 150 +++++++++++++---------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 17 +++
2 files changed, 107 insertions(+), 60 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 9f2bc73aca4ad..d011b7a800a5d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,6 +14,7 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
@@ -481,6 +482,80 @@ bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
return LR->liveAt(SI);
}
+// Check if all register units of Reg are currently live in PhysLiveRegs.
+bool GCNRPTracker::allRegUnitsLive(Register Reg) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ return llvm::all_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any();
+ });
+}
+
+// Return true if Reg has any killed units at the given slot index. Otherwise
+// return false.
+bool GCNRPTracker::checkRegKilled(Register Reg, SlotIndex SI) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ return llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any() &&
+ !isUnitLiveAt(Unit, SI);
+ });
+}
+
+// Return true if Reg has any killed units and erase them from PhysLiveRegs.
+bool GCNRPTracker::eraseKilledUnits(Register Reg, SlotIndex SI) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool IsKilled = false;
+
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ if (!isUnitLiveAt(Unit, SI)) {
+ IsKilled = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ }
+ return IsKilled;
+}
+
+// Erase all live units of Reg from PhysLiveRegs.
+bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool WasLive = false;
+
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ WasLive = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+
+ return WasLive;
+}
+
+// Insert a reg-unit into PhysLiveRegs if not already live.
+bool GCNRPTracker::insertAllNotLiveUnits(Register Reg) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool WasNotLive = false;
+
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none()) {
+ WasNotLive = true;
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ return WasNotLive;
+}
+
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter) {
@@ -641,16 +716,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
continue;
- // Check if any unit of this register was live before.
- bool WasLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.any()) {
- WasLive = true;
- PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ // Check if any unit of this register was live before and erase them.
+ bool WasLive = eraseAllLiveUnits(Reg);
+
// Update pressure once per register if it was live.
if (WasLive)
CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
@@ -664,16 +732,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
Register Reg = MO.getReg();
if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
continue;
- // Check if any unit of this register was not live before.
- bool WasNotLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.none()) {
- WasNotLive = true;
- PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ // Check if any unit of this register was not live before and insert them.
+ bool WasNotLive = insertAllNotLiveUnits(Reg);
+
// Update pressure once per register if it wasn't live before.
if (WasNotLive) {
CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
@@ -763,7 +824,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
}
}
- // Track physical register deaths (only if enabled).
+ // Track physical register kills (only if enabled).
if (TrackPhysRegs) {
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
@@ -778,24 +839,11 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
!SeenRegs.insert(Reg).second)
continue;
- // Check if any unit of this register is dying.
- bool WasLive = false;
- bool IsDying = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.any()) {
- WasLive = true;
- // Use LiveIntervals to check if unit dies at SI.
- if (!isUnitLiveAt(Unit, SI)) {
- IsDying = true;
- PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
- }
+ // Check if any unit of this register is killed and erase killed units.
+ bool IsKilled = eraseKilledUnits(Reg, SI);
- // Update pressure once per register if it was live and is now dying.
- if (WasLive && IsDying)
+ // Update pressure once per register if it was live and is now killed.
+ if (IsKilled)
CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
*MRI);
}
@@ -989,14 +1037,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
continue;
// Check if any unit of this register is not currently live.
- bool WasNotLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- if (PhysLiveRegs.contains(VirtRegOrUnit(static_cast<MCRegUnit>(Unit)))
- .none()) {
- WasNotLive = true;
- break;
- }
- }
+ bool WasNotLive = !allRegUnitsLive(Reg);
if (WasNotLive && !MO.isDead()) {
TempPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
@@ -1014,21 +1055,10 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
!SeenRegs.insert(Reg).second)
continue;
- // Check if any unit of this register is dying.
- bool IsDying = false;
- bool IsLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
- if (PhysLiveRegs.contains(VRU).any()) {
- IsLive = true;
- if (!isUnitLiveAt(Unit, SlotIdx)) {
- IsDying = true;
- break;
- }
- }
- }
+ // Check if any unit of this register is killed.
+ bool IsKilled = checkRegKilled(Reg, SlotIdx);
- if (IsLive && IsDying) {
+ if (IsKilled) {
TempPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
*MRI);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 6eabcececac95..252230e82f704 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -332,6 +332,23 @@ class GCNRPTracker {
Register getPhysRegFromUnit(MCRegUnit Unit) const;
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
+ // Check if all register units of Reg are currently live in PhysLiveRegs.
+ bool allRegUnitsLive(Register Reg) const;
+
+ // Check if Reg has any killed units at the given slot index.
+ bool checkRegKilled(Register Reg, SlotIndex SI) const;
+
+ // Check if Reg has any killed units and erase them from PhysLiveRegs.
+ bool eraseKilledUnits(Register Reg, SlotIndex SI);
+
+ // Erase all live units of Reg from PhysLiveRegs.
+ // Returns true if any unit was live (and thus erased).
+ bool eraseAllLiveUnits(Register Reg);
+
+ // Insert all not-live units of Reg into PhysLiveRegs.
+ // Returns true if any unit was not live (and thus inserted).
+ bool insertAllNotLiveUnits(Register Reg);
+
public:
// Initialize PhysLiveRegs capacity. Must be called before first use.
void initPhysLiveRegs(const MachineRegisterInfo &MRI_) {
>From a2ae7509e310b35d77ab6e281443fc86dd8681ee Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 6 Feb 2026 02:09:42 -0600
Subject: [PATCH 3/6] [AMDGPU] Use isAllocatable instead of special handling of
reserved registers.
Removed shouldTrackRegisterForPressure and dependencies.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 47 ++-----
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 5 +-
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 120 ------------------
llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 11 --
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 4 +-
5 files changed, 13 insertions(+), 174 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index d011b7a800a5d..1db79a2329674 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -460,21 +460,7 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
}
-// Helper to get the physical register that owns a register unit.
-Register GCNRPTracker::getPhysRegFromUnit(MCRegUnit Unit) const {
- assert(MRI && "MRI not initialized");
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
-
- // Return the first allocatable register that contains this unit.
- for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
- Register Reg(*RI);
- if (MRI->isAllocatable(Reg))
- return Reg;
- }
- return Register();
-}
-
-// Helper toheck if a register unit is live at a given slot index.
+// Helper to check if a register unit is live at a given slot index.
bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
const LiveRange *LR = LIS.getCachedRegUnit(Unit);
if (!LR)
@@ -507,7 +493,6 @@ bool GCNRPTracker::eraseKilledUnits(Register Reg, SlotIndex SI) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
bool IsKilled = false;
-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
VirtRegOrUnit VRU(Unit);
LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
@@ -526,7 +511,6 @@ bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
bool WasLive = false;
-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
VirtRegOrUnit VRU(Unit);
LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
@@ -535,7 +519,6 @@ bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
}
}
-
return WasLive;
}
@@ -544,7 +527,6 @@ bool GCNRPTracker::insertAllNotLiveUnits(Register Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
bool WasNotLive = false;
-
for (MCRegUnit Unit : TRI->regunits(Reg)) {
VirtRegOrUnit VRU(Unit);
LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
@@ -705,15 +687,12 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// Track physical register defs and uses (only if enabled).
if (TrackPhysRegs) {
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
-
// Kill physical register defs (moving backward in upward tracking).
for (const MachineOperand &MO : MI.all_defs()) {
if (!MO.getReg().isPhysical())
continue;
Register Reg = MO.getReg();
- if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
+ if (!MRI->isAllocatable(Reg))
continue;
// Check if any unit of this register was live before and erase them.
@@ -730,7 +709,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
continue;
Register Reg = MO.getReg();
- if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
+ if (!MRI->isAllocatable(Reg))
continue;
// Check if any unit of this register was not live before and insert them.
bool WasNotLive = insertAllNotLiveUnits(Reg);
@@ -826,17 +805,13 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
// Track physical register kills (only if enabled).
if (TrackPhysRegs) {
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
-
// Iterate over actual instruction operands to track which registers die.
SmallSet<Register, 8> SeenRegs;
for (const auto &MO : CurrMI->operands()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
Register Reg = MO.getReg();
- if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg) ||
- !SeenRegs.insert(Reg).second)
+ if (!MRI->isAllocatable(Reg) || !SeenRegs.insert(Reg).second)
continue;
// Check if any unit of this register is killed and erase killed units.
@@ -881,18 +856,15 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
// Add new physical register defs (only if enabled).
if (TrackPhysRegs) {
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
-
for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
- if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg))
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg))
continue;
// Check if any unit of this register was not live before.
bool WasNotLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(static_cast<MCRegUnit>(Unit));
+ for (MCRegUnit Unit : MRI->getTargetRegisterInfo()->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
if (PrevMask.none())
WasNotLive = true;
@@ -1026,13 +998,12 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
// Process physical registers (only if enabled).
if (TrackPhysRegs) {
- const SIRegisterInfo *STRI = static_cast<const SIRegisterInfo *>(TRI);
SmallSet<Register, 8> SeenRegs;
// Process physical register defs.
for (const auto &MO : MI->all_defs()) {
Register Reg = MO.getReg();
- if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg) ||
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
!SeenRegs.insert(Reg).second)
continue;
@@ -1051,7 +1022,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
Register Reg = MO.getReg();
- if (!STRI->shouldTrackRegisterForPressure(*MRI, Reg) ||
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
!SeenRegs.insert(Reg).second)
continue;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 252230e82f704..7dc715cc20c6e 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -329,7 +329,6 @@ class GCNRPTracker {
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const;
// Helper methods for physical register tracking
- Register getPhysRegFromUnit(MCRegUnit Unit) const;
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
// Check if all register units of Reg are currently live in PhysLiveRegs.
@@ -351,8 +350,8 @@ class GCNRPTracker {
public:
// Initialize PhysLiveRegs capacity. Must be called before first use.
- void initPhysLiveRegs(const MachineRegisterInfo &MRI_) {
- PhysLiveRegs.init(MRI_);
+ void initPhysLiveRegs(const MachineRegisterInfo &MRI) {
+ PhysLiveRegs.init(MRI);
}
// Enable physical register tracking. Should only be called when GCNTrackers
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index e4890f952a50f..409509120c32d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3781,126 +3781,6 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
return RC && isAGPRClass(RC);
}
-// Helper used by GCN trackers to check if a physical register should be tracked
-// for pressure.
-bool SIRegisterInfo::isReservedSpecialRegister(Register Reg) {
- if (!Reg.isPhysical())
- return false;
-
- // Check all special-purpose registers that are reserved in getReservedRegs().
- // These registers don't contribute to general register pressure and
- // correspond to register classes with GeneratePressureSet = 0 in .td files.
- switch (Reg.id()) {
- // MODE - floating point mode register
- case AMDGPU::MODE:
-
- // EXEC - execution mask (and sub-registers)
- case AMDGPU::EXEC:
- case AMDGPU::EXEC_LO:
- case AMDGPU::EXEC_HI:
-
- // FLAT_SCR - flat scratch address (and sub-registers)
- case AMDGPU::FLAT_SCR:
- case AMDGPU::FLAT_SCR_LO:
- case AMDGPU::FLAT_SCR_HI:
-
- // M0 - memory operation descriptor
- case AMDGPU::M0:
-
- // VCC - condition code register (and sub-registers)
- case AMDGPU::VCC:
- case AMDGPU::VCC_LO:
- case AMDGPU::VCC_HI:
-
- // SCC - scalar condition code
- case AMDGPU::SCC:
-
- // Special source operands
- case AMDGPU::SRC_VCCZ:
- case AMDGPU::SRC_EXECZ:
- case AMDGPU::SRC_SCC:
-
- // Memory aperture registers
- case AMDGPU::SRC_SHARED_BASE:
- case AMDGPU::SRC_SHARED_LIMIT:
- case AMDGPU::SRC_PRIVATE_BASE:
- case AMDGPU::SRC_PRIVATE_LIMIT:
- case AMDGPU::SRC_FLAT_SCRATCH_BASE_LO:
- case AMDGPU::SRC_FLAT_SCRATCH_BASE_HI:
-
- // Async counter pseudo registers
- case AMDGPU::ASYNCcnt:
- case AMDGPU::TENSORcnt:
-
- // Other special registers
- case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
- case AMDGPU::LDS_DIRECT:
-
- // XNACK_MASK (and sub-registers) - page fault handling
- case AMDGPU::XNACK_MASK:
- case AMDGPU::XNACK_MASK_LO:
- case AMDGPU::XNACK_MASK_HI:
-
- // Trap handler registers (TBA/TMA and their sub-registers)
- case AMDGPU::TBA:
- case AMDGPU::TBA_LO:
- case AMDGPU::TBA_HI:
- case AMDGPU::TMA:
- case AMDGPU::TMA_LO:
- case AMDGPU::TMA_HI:
-
- // Trap handler temporary registers (tuples and various register sizes)
- case AMDGPU::TTMP0_TTMP1:
- case AMDGPU::TTMP2_TTMP3:
- case AMDGPU::TTMP4_TTMP5:
- case AMDGPU::TTMP6_TTMP7:
- case AMDGPU::TTMP8_TTMP9:
- case AMDGPU::TTMP10_TTMP11:
- case AMDGPU::TTMP12_TTMP13:
- case AMDGPU::TTMP14_TTMP15:
-
- // Null register
- case AMDGPU::SGPR_NULL64:
- return true;
-
- default:
- return false;
- }
-
- // Note: Individual TTMP registers (TTMP0-TTMP15, etc.) and other
- // sub-registers are reserved via reserveRegisterTuples() in
- // getReservedRegs(), which marks all aliases as non-allocatable. They don't
- // need explicit checks here since shouldTrackRegisterForPressure() filters
- // non-allocatable registers.
-}
-
-// Helper to check if a physical register should be tracked for pressure by GCN
-// trackers. Returns true for actual SGPRs/VGPRs/AGPRs, false for special
-// registers (VCC, EXEC, M0, etc.).
-//
-// The generic RegPressureTracker avoids counting special registers through
-// pressure sets: register classes with "GeneratePressureSet = 0" in the .td
-// files don't contribute to pressure. Since GCNRPTracker counts registers
-// directly, we need to explicitly filter special registers to match the generic
-// tracker's behavior.
-//
-// This list corresponds to special registers that:
-// 1. Are in register classes with GeneratePressureSet = 0 (see
-// SIRegisterInfo.td)
-// 2. Are reserved in getReservedRegs() (see SIRegisterInfo.cpp)
-// 3. Shouldn't count toward actual SGPR/VGPR/AGPR pressure
-bool SIRegisterInfo::shouldTrackRegisterForPressure(
- const MachineRegisterInfo &MRI, Register Reg) const {
- // Only track physical, allocatable registers
- if (!Reg.isPhysical() || !MRI.isAllocatable(Reg))
- return false;
-
- if (isReservedSpecialRegister(Reg))
- return false;
-
- return isSGPRReg(MRI, Reg) || isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
-}
-
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index a3cadef3e6bc1..2e2916f68f584 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -332,17 +332,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
- /// Returns true if the register is a special-purpose/reserved register that
- /// appears in getReservedRegs(). These registers don't contribute to general
- /// register pressure (e.g., VCC, EXEC, M0, trap handler registers, etc.).
- static bool isReservedSpecialRegister(Register Reg);
-
- /// Returns true if the register should contribute to register pressure
- /// tracking for scheduling purposes. Filters out special-purpose registers
- /// that don't count toward SGPR/VGPR/AGPR limits (e.g., VCC, EXEC, M0).
- bool shouldTrackRegisterForPressure(const MachineRegisterInfo &MRI,
- Register Reg) const;
-
// FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
// (such as VCC) which hold a wave-wide vector of boolean values. Examining
// just the register class is not suffcient; it needs to be combined with a
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index ce076faa91fd5..86fff3029c735 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -104,8 +104,8 @@ entry:
; Test virtual and physical register overlap
; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 44, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 42, LVGPR WT: 0, LSGPR WT: 18
; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
>From 33a9d69e05a4a4d0785b37b70e017851af787a05 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Sat, 14 Feb 2026 09:37:18 -0600
Subject: [PATCH 4/6] [AMDGPU] Register pressure computation fix.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 40 +++++++++++--------
.../AMDGPU/materialize-frame-index-sgpr.ll | 8 ++--
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 4 +-
3 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 1db79a2329674..92db189736d40 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -51,8 +51,32 @@ void GCNRegPressure::inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ const TargetRegisterClass *RC;
+ if (Register(Reg).isVirtual()) {
+ RC = MRI.getRegClass(Reg);
+ } else {
+ if (!MRI.isAllocatable(Reg))
+ return;
+ RC = TRI->getMinimalPhysRegClass(Reg);
+ if (!RC)
+ return;
+ }
+
+ unsigned RegKind = getRegKind(RC, STI);
unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(NewMask);
unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(PrevMask);
+ // If multiple bits are set in the input masks for physical SGPRs, the
+ // expected result does not match what getNumCoveredRegs returns. This is
+ // because it returns the number of vector lanes, not the number of 32-bit
+ // regs. Hence, cap to the register's actual size so e.g. a 32-bit SGPR counts
+ // as 1 and VCC (64-bit) counts as 2, not 32.
+ if (Register(Reg).isPhysical() && RegKind == SGPR) {
+ unsigned MaxCovered = TRI->getRegSizeInBits(*RC) / 32;
+ NewNumCoveredRegs = std::min(NewNumCoveredRegs, MaxCovered);
+ PrevNumCoveredRegs = std::min(PrevNumCoveredRegs, MaxCovered);
+ }
if (NewNumCoveredRegs == PrevNumCoveredRegs)
return;
@@ -65,22 +89,6 @@ void GCNRegPressure::inc(unsigned Reg,
assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
"prev mask should always be lesser than new");
- const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
- const TargetRegisterClass *RC;
- if (Register(Reg).isVirtual()) {
- RC = MRI.getRegClass(Reg);
- } else {
- // For physical registers, skip non-allocatable registers (reserved,
- // special, etc.).
- if (!MRI.isAllocatable(Reg))
- return;
- // For physical registers, get the minimal register class.
- RC = TRI->getMinimalPhysRegClass(Reg);
- if (!RC)
- return;
- }
- unsigned RegKind = getRegKind(RC, STI);
if (TRI->getRegSizeInBits(*RC) != 32) {
// Reg is from a tuple register class.
if (PrevMask.none()) {
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index d076d8ab55d50..810f478b3f12a 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -1872,7 +1872,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
-; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
@@ -1932,7 +1932,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
-; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
@@ -1991,7 +1991,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
-; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
@@ -2056,7 +2056,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
-; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 86fff3029c735..7cdd10d5d6993 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -104,8 +104,8 @@ entry:
; Test virtual and physical register overlap
; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 44, LVGPR WT: 0, LSGPR WT: 18
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 42, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 18
; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
>From d456e8af8c64fd74283221d39f7263c1d32f4a72 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 20 Feb 2026 12:44:05 -0600
Subject: [PATCH 5/6] Use divideCeil to take care of registers less than 32bit
in size.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 92db189736d40..f64c99665040b 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -73,7 +74,7 @@ void GCNRegPressure::inc(unsigned Reg,
// regs. Hence, cap to the register's actual size so e.g. a 32-bit SGPR counts
// as 1 and VCC (64-bit) counts as 2, not 32.
if (Register(Reg).isPhysical() && RegKind == SGPR) {
- unsigned MaxCovered = TRI->getRegSizeInBits(*RC) / 32;
+ unsigned MaxCovered = divideCeil(TRI->getRegSizeInBits(*RC), 32);
NewNumCoveredRegs = std::min(NewNumCoveredRegs, MaxCovered);
PrevNumCoveredRegs = std::min(PrevNumCoveredRegs, MaxCovered);
}
>From 04e4650a185d7e4b2ba463a4d03dd60469e322c6 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Sat, 21 Feb 2026 12:51:26 -0600
Subject: [PATCH 6/6] Added an option to disable physical register tracking in
GCN trackers.
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 14 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 149 ++++++++++++++++++
2 files changed, 159 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index f69be8432b5eb..ab457a503b5fb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -69,6 +69,12 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+static cl::opt<bool> TrackPhysRegInTrackers(
+ "amdgpu-trackers-physical-register-tracking", cl::Hidden,
+ cl::desc("When using GCN trackers, count physical registers (e.g. from "
+ "inline asm) in pressure."),
+ cl::init(true));
+
static cl::opt<unsigned> PendingQueueLimit(
"amdgpu-scheduler-pending-queue-limit", cl::Hidden,
cl::desc(
@@ -971,7 +977,7 @@ GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
GCNDownwardRPTracker RPTracker(*LIS);
RPTracker.initPhysLiveRegs(MF.getRegInfo());
- if (GCNTrackers)
+ if (GCNTrackers && TrackPhysRegInTrackers)
RPTracker.enablePhysTracking();
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&LiveIns[RegionIdx]);
@@ -988,7 +994,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
const MachineBasicBlock *MBB) {
GCNDownwardRPTracker RPTracker(*LIS);
RPTracker.initPhysLiveRegs(MF.getRegInfo());
- if (GCNTrackers)
+ if (GCNTrackers && TrackPhysRegInTrackers)
RPTracker.enablePhysTracking();
// If the block has the only successor then live-ins of that successor are
@@ -1140,7 +1146,7 @@ void GCNScheduleDAGMILive::runSchedStages() {
// Initialize physical register tracking in GCN trackers.
S.getDownwardTracker()->initPhysLiveRegs(MF.getRegInfo());
S.getUpwardTracker()->initPhysLiveRegs(MF.getRegInfo());
- if (GCNTrackers) {
+ if (GCNTrackers && TrackPhysRegInTrackers) {
S.getDownwardTracker()->enablePhysTracking();
S.getUpwardTracker()->enablePhysTracking();
}
@@ -2152,7 +2158,7 @@ void PreRARematStage::rematerialize() {
} else {
GCNDownwardRPTracker RPT(*DAG.LIS);
RPT.initPhysLiveRegs(DAG.MRI);
- if (GCNTrackers)
+ if (GCNTrackers && TrackPhysRegInTrackers)
RPT.enablePhysTracking();
auto *NonDbgMI = &*skipDebugInstructionsForward(DAG.Regions[I].first,
DAG.Regions[I].second);
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 7cdd10d5d6993..084acee121f78 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -1,7 +1,9 @@
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
; REQUIRES: asserts
; Test that GCN trackers correctly track physical register pressure from inline asm
@@ -14,6 +16,10 @@
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
@@ -31,6 +37,10 @@ entry:
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
@@ -48,6 +58,10 @@ entry:
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
@@ -62,6 +76,8 @@ entry:
; GENERIC-DEBUG-LABEL: test_no_inflation
+; GCN-NOPHYS-DEBUG-LABEL: test_no_inflation
+
define amdgpu_kernel void @test_no_inflation() {
entry:
ret void
@@ -77,6 +93,10 @@ entry:
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
@@ -94,6 +114,10 @@ entry:
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_input
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
@@ -111,6 +135,10 @@ entry:
; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+
define amdgpu_kernel void @test_vreg_and_physreg_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
@@ -242,6 +270,127 @@ entry:
; GCN: TotalNumSgprs: 14
; GCN: NumVgprs: 2
+; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
+; GCN-NOPHYS-LABEL: test_single_physreg:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_single_physreg.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_multiple_physregs:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_multiple_physregs.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_no_inflation:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_no_inflation.numbered_sgpr, 0
+; GCN-NOPHYS: TotalNumSgprs: 0
+; GCN-NOPHYS: NumVgprs: 0
+
+; GCN-NOPHYS-LABEL: test_early_clobber:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_early_clobber.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_physreg_input:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_physreg_input.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s11, s7
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 14
+; GCN-NOPHYS: NumVgprs: 2
+
; Verify assembly output without GCN trackers (should be identical)
; NO-GCN-LABEL: test_single_physreg:
; NO-GCN-NEXT: ; %bb.0:
More information about the llvm-commits
mailing list