[llvm-branch-commits] [llvm] [2/3][AMDGPU] Physical register tracking in GCN trackers. (PR #184275)
Dhruva Chakrabarti via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 12 22:25:24 PDT 2026
https://github.com/dhruvachak updated https://github.com/llvm/llvm-project/pull/184275
>From 2363eca9f99ed1e1bb0c32b915ac8d48c144bbe9 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 27 Feb 2026 13:13:15 -0600
Subject: [PATCH 01/25] [AMDGPU] Physical register tracking in GCN trackers.
Previously, the GCN tracker only monitored virtual registers, leading to
inaccurate register pressure estimates and sub-optimal scheduling decisions
when physical registers were present. This patch adds support for tracking
physical registers in GCN trackers. Virtual and physical register tracking
are maintained separately. Similar to virtual LiveRegs, physical LiveRegs
are now maintained. The tracking flow closely follows that of the GCN
trackers in the recede, advance, and pressure-increment methods.
Tracking physical registers leads to better register allocation, no more
allocation failures, and more accurate pressure estimates. Existing tests
have been updated to reflect the above. A new test schedule-gcn-physreg-pressure
has been added that validates physical register tracking across multiple
scenarios.
Assisted-by: Cursor
---
llvm/include/llvm/CodeGen/RegisterPressure.h | 14 +
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 9 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 297 +++-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 120 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 87 +-
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 2 +-
.../machine-scheduler-sink-trivial-remats.mir | 94 +-
.../AMDGPU/materialize-frame-index-sgpr.ll | 1558 +++++++++++++++++
.../schedule-amdgpu-tracker-physreg-crash.ll | 12 +-
.../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 32 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 513 ++++++
11 files changed, 2587 insertions(+), 151 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 7485be6dcb351..01a944f386014 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -293,6 +293,20 @@ class LiveRegSet {
}
public:
+ LiveRegSet() = default;
+
+ // Copy assignment operator - copies live register contents.
+ // Note: Both LiveRegSets must have been initialized with init() first.
+ LiveRegSet &operator=(const LiveRegSet &Other) {
+ if (this != &Other) {
+ NumRegUnits = Other.NumRegUnits;
+ Regs.clear();
+ for (const IndexMaskPair &Pair : Other.Regs)
+ Regs.insert(Pair);
+ }
+ return *this;
+ }
+
LLVM_ABI void clear();
LLVM_ABI void init(const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index f2e91e9c1be70..09147452ef4a5 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -238,11 +238,8 @@ class SchedStrategyStub : public MachineSchedStrategy {
GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
StrategyKind S)
- : BaseClass(C, std::make_unique<SchedStrategyStub>())
- , Context(C)
- , Strategy(S)
- , UPTracker(*LIS) {
-}
+ : BaseClass(C, std::make_unique<SchedStrategyStub>()), Context(C),
+ Strategy(S), UPTracker(*LIS, Context->MF->getRegInfo()) {}
// returns max pressure for a region
GCNRegPressure
@@ -281,7 +278,7 @@ template <typename Range> GCNRegPressure
GCNIterativeScheduler::getSchedulePressure(const Region &R,
Range &&Schedule) const {
auto const BBEnd = R.Begin->getParent()->end();
- GCNUpwardRPTracker RPTracker(*LIS);
+ GCNUpwardRPTracker RPTracker(*LIS, MF.getRegInfo());
if (R.End != BBEnd) {
// R.End points to the boundary instruction but the
// schedule doesn't include it
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index c05e37ed44f4e..3e619ef8ba8ba 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,8 +14,10 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -50,8 +52,32 @@ void GCNRegPressure::inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ const TargetRegisterClass *RC;
+ if (Register(Reg).isVirtual()) {
+ RC = MRI.getRegClass(Reg);
+ } else {
+ if (!MRI.isAllocatable(Reg))
+ return;
+ RC = TRI->getMinimalPhysRegClass(Reg);
+ if (!RC)
+ return;
+ }
+
+ unsigned RegKind = getRegKind(RC, STI);
unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(NewMask);
unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(PrevMask);
+ // If multiple bits are set in the input masks for physical SGPRs, the
+ // expected result does not match what getNumCoveredRegs returns. This is
+ // because it returns the number of vector lanes, not the number of 32-bit
+ // regs. Hence, cap to the register's actual size so e.g. a 32-bit SGPR counts
+ // as 1 and VCC (64-bit) counts as 2, not 32.
+ if (Register(Reg).isPhysical() && RegKind == SGPR) {
+ unsigned MaxCovered = divideCeil(TRI->getRegSizeInBits(*RC), 32);
+ NewNumCoveredRegs = std::min(NewNumCoveredRegs, MaxCovered);
+ PrevNumCoveredRegs = std::min(PrevNumCoveredRegs, MaxCovered);
+ }
if (NewNumCoveredRegs == PrevNumCoveredRegs)
return;
@@ -64,10 +90,6 @@ void GCNRegPressure::inc(unsigned Reg,
assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
"prev mask should always be lesser than new");
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
- unsigned RegKind = getRegKind(RC, STI);
if (TRI->getRegSizeInBits(*RC) != 32) {
// Reg is from a tuple register class.
if (PrevMask.none()) {
@@ -471,6 +493,77 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
}
+bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
+ const LiveRange *LR = LIS.getCachedRegUnit(Unit);
+ if (!LR)
+ return false;
+ return LR->liveAt(SI);
+}
+
+bool GCNRPTracker::allRegUnitsLive(Register Reg) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ return llvm::all_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any();
+ });
+}
+
+bool GCNRPTracker::checkRegKilled(Register Reg, SlotIndex SI) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ return llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any() &&
+ !isUnitLiveAt(Unit, SI);
+ });
+}
+
+bool GCNRPTracker::eraseKilledUnits(Register Reg, SlotIndex SI) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool IsKilled = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ if (!isUnitLiveAt(Unit, SI)) {
+ IsKilled = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ }
+ return IsKilled;
+}
+
+bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool WasLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ WasLive = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ return WasLive;
+}
+
+bool GCNRPTracker::insertAllNotLiveUnits(Register Reg) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none()) {
+ WasNotLive = true;
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ return WasNotLive;
+}
+
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter) {
@@ -488,10 +581,10 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
return LiveMask;
}
-GCNRPTracker::LiveRegSet
-llvm::getVirtLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI,
- GCNRegPressure::RegKind RegKind) {
+GCNRPTracker::LiveRegSet llvm::getVirtLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind) {
GCNRPTracker::LiveRegSet VirtLiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
@@ -520,6 +613,15 @@ void GCNRPTracker::reset(const MachineInstr &MI,
}
MaxPressure = CurPressure = getVirtRegPressure(*MRI, VirtLiveRegs);
+
+ setPhysRegTracking();
+ // Clear physical register tracking (only if enabled)
+ if (TrackPhysRegs) {
+ PhysLiveRegs.clear();
+ PhysLiveRegs.init(*MRI);
+ MaxPhysPressure.clear();
+ CurPhysPressure.clear();
+ }
}
void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
@@ -528,6 +630,15 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
VirtLiveRegs = VirtLiveRegsSet;
LastTrackedMI = nullptr;
MaxPressure = CurPressure = getVirtRegPressure(MRInfo, VirtLiveRegsSet);
+
+ setPhysRegTracking();
+ // Clear physical register tracking (only if enabled)
+ if (TrackPhysRegs) {
+ PhysLiveRegs.clear();
+ PhysLiveRegs.init(*MRI);
+ MaxPhysPressure.clear();
+ CurPhysPressure.clear();
+ }
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
@@ -600,6 +711,45 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
: max(CurPressure, MaxPressure);
+ // Track physical register defs and uses (only if enabled).
+ if (TrackPhysRegs) {
+ // Kill physical register defs (moving backward in upward tracking).
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!MRI->isAllocatable(Reg))
+ continue;
+
+ // Check if any unit of this register was live before and erase them.
+ bool WasLive = eraseAllLiveUnits(Reg);
+
+ // Update pressure once per register if it was live.
+ if (WasLive)
+ CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+
+ // Make physical register uses alive (moving backward in upward tracking).
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!MRI->isAllocatable(Reg))
+ continue;
+ // Check if any unit of this register was not live before and insert them.
+ bool WasNotLive = insertAllNotLiveUnits(Reg);
+
+ // Update pressure once per register if it wasn't live before.
+ if (WasNotLive) {
+ CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
+ }
+
assert(CurPressure == getVirtRegPressure(*MRI, VirtLiveRegs));
}
@@ -678,7 +828,29 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
}
}
+ // Track physical register kills (only if enabled).
+ if (TrackPhysRegs) {
+ // Iterate over actual instruction operands to track which registers die.
+ SmallSet<Register, 8> SeenRegs;
+ for (const auto &MO : CurrMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!MRI->isAllocatable(Reg) || !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is killed and erase killed units.
+ bool IsKilled = eraseKilledUnits(Reg, SI);
+
+ // Update pressure once per register if it was live and is now killed.
+ if (IsKilled)
+ CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+ }
+
MaxPressure = max(MaxPressure, CurPressure);
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
LastTrackedMI = nullptr;
@@ -696,7 +868,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
const MachineInstr *CurrMI = LastTrackedMI;
- // Add new registers or mask bits.
+ // Add new registers or mask bits (virtual registers).
for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
@@ -707,7 +879,33 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
}
+ // Add new physical register defs (only if enabled).
+ if (TrackPhysRegs) {
+ for (const auto &MO : CurrMI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg))
+ continue;
+
+ // Check if any unit of this register was not live before.
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : MRI->getTargetRegisterInfo()->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none())
+ WasNotLive = true;
+ // Mark unit as live
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+
+ // Update pressure once per register if it wasn't live before.
+ if (WasNotLive)
+ CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
MaxPressure = max(MaxPressure, CurPressure);
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
}
bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator) {
@@ -774,7 +972,9 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
GCNRegPressure TempPressure = CurPressure;
+ GCNRegPressure TempPhysPressure = CurPhysPressure;
+ // Process virtual register uses
for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
if (!Use.VRegOrUnit.isVirtualReg())
continue;
@@ -809,7 +1009,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
}
- // Generate liveness for defs.
+ // Generate liveness for virtual register defs.
for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
if (!Def.VRegOrUnit.isVirtualReg())
continue;
@@ -821,7 +1021,48 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
}
- return TempPressure;
+ // Process physical registers (only if enabled).
+ if (TrackPhysRegs) {
+ SmallSet<Register, 8> SeenRegs;
+
+ // Process physical register defs.
+ for (const auto &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is not currently live.
+ bool WasNotLive = !allRegUnitsLive(Reg);
+
+ if (WasNotLive && !MO.isDead()) {
+ TempPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ // Process physical register uses to find kills.
+ SeenRegs.clear();
+ for (const auto &MO : MI->uses()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is killed.
+ bool IsKilled = checkRegKilled(Reg, SlotIdx);
+
+ if (IsKilled) {
+ TempPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+ }
+ }
+
+ // Return sum of virtual and physical pressure
+ return TempPressure + TempPhysPressure;
}
bool GCNUpwardRPTracker::isValid() const {
@@ -941,18 +1182,18 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
- GCNRPTracker::LiveRegSet VirtLiveIn, VirtLiveOut;
+ GCNRPTracker::LiveRegSet LiveIn, LiveOut;
GCNRegPressure RPAtMBBEnd;
if (UseDownwardTracker) {
if (MBB.empty()) {
- VirtLiveIn = VirtLiveOut = getVirtLiveRegs(MBBStartSlot, LIS, MRI);
- RPAtMBBEnd = getVirtRegPressure(MRI, VirtLiveIn);
+ LiveIn = LiveOut = getVirtLiveRegs(MBBStartSlot, LIS, MRI);
+ RPAtMBBEnd = getVirtRegPressure(MRI, LiveIn);
} else {
- GCNDownwardRPTracker RPT(LIS);
+ GCNDownwardRPTracker RPT(LIS, MRI);
RPT.reset(MBB.front());
- VirtLiveIn = RPT.getVirtLiveRegs();
+ LiveIn = RPT.getVirtLiveRegs();
while (!RPT.advanceBeforeNext()) {
GCNRegPressure RPBeforeMI = RPT.getPressure();
@@ -960,14 +1201,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RP.emplace_back(RPBeforeMI, RPT.getPressure());
}
- VirtLiveOut = RPT.getVirtLiveRegs();
+ LiveOut = RPT.getVirtLiveRegs();
RPAtMBBEnd = RPT.getPressure();
}
} else {
- GCNUpwardRPTracker RPT(LIS);
+ GCNUpwardRPTracker RPT(LIS, MRI);
RPT.reset(MRI, MBBLastSlot);
- VirtLiveOut = RPT.getVirtLiveRegs();
+ LiveOut = RPT.getVirtLiveRegs();
RPAtMBBEnd = RPT.getPressure();
for (auto &MI : reverse(MBB)) {
@@ -977,13 +1218,12 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RP.emplace_back(RPT.getPressure(), RPT.getMaxPressure());
}
- VirtLiveIn = RPT.getVirtLiveRegs();
+ LiveIn = RPT.getVirtLiveRegs();
}
- OS << PFX " Live-in: " << llvm::print(VirtLiveIn, MRI);
+ OS << PFX " Live-in: " << llvm::print(LiveIn, MRI);
if (!UseDownwardTracker)
- ReportLISMismatchIfAny(VirtLiveIn,
- getVirtLiveRegs(MBBStartSlot, LIS, MRI));
+ ReportLISMismatchIfAny(LiveIn, getVirtLiveRegs(MBBStartSlot, LIS, MRI));
OS << PFX " SGPR VGPR\n";
int I = 0;
@@ -999,14 +1239,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
OS << printRP(RPAtMBBEnd) << '\n';
- OS << PFX " Live-out:" << llvm::print(VirtLiveOut, MRI);
+ OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
if (UseDownwardTracker)
- ReportLISMismatchIfAny(VirtLiveOut,
- getVirtLiveRegs(MBBLastSlot, LIS, MRI));
+ ReportLISMismatchIfAny(LiveOut, getVirtLiveRegs(MBBLastSlot, LIS, MRI));
GCNRPTracker::LiveRegSet LiveThrough;
- for (auto [Reg, Mask] : VirtLiveIn) {
- LaneBitmask MaskIntersection = Mask & VirtLiveOut.lookup(Reg);
+ for (auto [Reg, Mask] : LiveIn) {
+ LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
if (MaskIntersection.any()) {
LaneBitmask LTMask = getRegLiveThroughMask(
MRI, LIS, Reg, MBBStartSlot, MBBLastSlot, MaskIntersection);
@@ -1036,7 +1275,7 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
unsigned MaxNumRegs = 0;
const MachineInstr *MaxPressureMI = nullptr;
- GCNUpwardRPTracker RPT(LIS);
+ GCNUpwardRPTracker RPT(LIS, MRI);
for (const MachineBasicBlock &MBB : MF) {
RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
for (const MachineInstr &MI : reverse(MBB)) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index d7aff50822b50..b3865fbade3ce 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -320,12 +320,47 @@ class GCNRPTracker {
protected:
const LiveIntervals &LIS;
+
+ // Virtual register tracking
LiveRegSet VirtLiveRegs;
GCNRegPressure CurPressure, MaxPressure;
+
+ // Physical register tracking: Maintain clean separation between virtual and
+ // physical registers. Tracking physical registers can be turned OFF with an
+ // option. Using llvm::LiveRegSet for consistency with the generic tracker.
+ llvm::LiveRegSet PhysLiveRegs;
+ GCNRegPressure CurPhysPressure, MaxPhysPressure;
+
+ // Flag to control whether physical register tracking is active.
+ // Set to true when GCNTrackers are enabled, false otherwise.
+ bool TrackPhysRegs = false;
+
const MachineInstr *LastTrackedMI = nullptr;
mutable const MachineRegisterInfo *MRI = nullptr;
- GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
+ GCNRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
+ : LIS(LIS), MRI(&MRI) {
+ setPhysRegTracking();
+ if (TrackPhysRegs)
+ PhysLiveRegs.init(MRI);
+ }
+
+ // Copy constructor - PhysLiveRegs must be initialized then copied.
+ GCNRPTracker(const GCNRPTracker &Other)
+ : LIS(Other.LIS), VirtLiveRegs(Other.VirtLiveRegs),
+ CurPressure(Other.CurPressure),
+ MaxPressure(Other.MaxPressure),
+ CurPhysPressure(Other.CurPhysPressure),
+ MaxPhysPressure(Other.MaxPhysPressure),
+ TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
+ MRI(Other.MRI) {
+ // Initialize PhysLiveRegs with proper universe, then copy contents.
+ if (MRI) {
+ PhysLiveRegs.init(*MRI);
+ PhysLiveRegs =
+ Other.PhysLiveRegs; // Use assignment operator to copy live regs.
+ }
+ }
void reset(const MachineInstr &MI, const LiveRegSet *VirtLiveRegsCopy,
bool After);
@@ -335,32 +370,76 @@ class GCNRPTracker {
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const;
+ // Helper to check if a register unit is live at a given slot index.
+ bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
+
+ // Check if all register units of Reg are currently live in PhysLiveRegs.
+ bool allRegUnitsLive(Register Reg) const;
+
+ // Check if Reg has any killed units at the given slot index.
+ bool checkRegKilled(Register Reg, SlotIndex SI) const;
+
+ // Check if Reg has any killed units and erase them from PhysLiveRegs.
+ bool eraseKilledUnits(Register Reg, SlotIndex SI);
+
+ // Erase all live units of Reg from PhysLiveRegs.
+ // Returns true if any unit was live (and thus erased).
+ bool eraseAllLiveUnits(Register Reg);
+
+ // Insert all not-live units of Reg into PhysLiveRegs.
+ // Returns true if any unit was not live (and thus inserted).
+ bool insertAllNotLiveUnits(Register Reg);
+
public:
+ // Enable physical register tracking only if both GCNTrackers and
+ // TrackPhysRegInTrackers are true.
+ void setPhysRegTracking();
+
// reset tracker and set live register set to the specified value.
void reset(const MachineRegisterInfo &MRInfo,
const LiveRegSet &VirtLiveRegsSet);
+
// live regs for the current state
const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
+ const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
- void clearMaxPressure() { MaxPressure.clear(); }
+ void clearMaxPressure() {
+ MaxPressure.clear();
+ MaxPhysPressure.clear();
+ }
- GCNRegPressure getPressure() const { return CurPressure; }
+ // Returns sum of virtual and physical register pressure
+ GCNRegPressure getPressure() const {
+ return CurPressure + CurPhysPressure;
+ }
+
+ // Returns only virtual register pressure
+ GCNRegPressure getVirtPressure() const { return CurPressure; }
+
+ // Returns only physical register pressure
+ GCNRegPressure getPhysPressure() const { return CurPhysPressure; }
+
+ // Returns sum of virtual and physical max pressure
+ GCNRegPressure getMaxPressure() const {
+ return MaxPressure + MaxPhysPressure;
+ }
decltype(VirtLiveRegs) moveVirtLiveRegs() { return std::move(VirtLiveRegs); }
};
GCNRPTracker::LiveRegSet
getVirtLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI,
- GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
class GCNUpwardRPTracker : public GCNRPTracker {
public:
- GCNUpwardRPTracker(const LiveIntervals &LIS) : GCNRPTracker(LIS) {}
+ GCNUpwardRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
+ : GCNRPTracker(LIS, MRI) {}
using GCNRPTracker::reset;
@@ -389,12 +468,13 @@ class GCNUpwardRPTracker : public GCNRPTracker {
/// to reported by LIS.
bool isValid() const;
- const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
-
- void resetMaxPressure() { MaxPressure = CurPressure; }
+ void resetMaxPressure() {
+ MaxPressure = CurPressure;
+ MaxPhysPressure = CurPhysPressure;
+ }
GCNRegPressure getMaxPressureAndReset() {
- GCNRegPressure RP = MaxPressure;
+ GCNRegPressure RP = getMaxPressure();
resetMaxPressure();
return RP;
}
@@ -410,7 +490,8 @@ class GCNDownwardRPTracker : public GCNRPTracker {
MachineBasicBlock::const_iterator MBBEnd;
public:
- GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
+ GCNDownwardRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
+ : GCNRPTracker(LIS, MRI) {}
using GCNRPTracker::reset;
@@ -418,8 +499,9 @@ class GCNDownwardRPTracker : public GCNRPTracker {
/// \p return MaxPressure and clear it.
GCNRegPressure moveMaxPressure() {
- auto Res = MaxPressure;
+ auto Res = getMaxPressure();
MaxPressure.clear();
+ MaxPhysPressure.clear();
return Res;
}
@@ -492,7 +574,7 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
template <typename Range>
-DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
+DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
getVirtLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
std::vector<SlotIndex> Indexes;
Indexes.reserve(llvm::size(R));
@@ -531,20 +613,20 @@ getVirtLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
}
inline GCNRPTracker::LiveRegSet getVirtLiveRegsAfter(const MachineInstr &MI,
- const LiveIntervals &LIS) {
+ const LiveIntervals &LIS) {
return getVirtLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
- MI.getMF()->getRegInfo());
+ MI.getMF()->getRegInfo());
}
-inline GCNRPTracker::LiveRegSet
-getVirtLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS) {
+inline GCNRPTracker::LiveRegSet getVirtLiveRegsBefore(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
return getVirtLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS,
- MI.getMF()->getRegInfo());
+ MI.getMF()->getRegInfo());
}
template <typename Range>
GCNRegPressure getVirtRegPressure(const MachineRegisterInfo &MRI,
- Range &&LiveRegs) {
+ Range &&LiveRegs) {
GCNRegPressure Res;
for (const auto &RM : LiveRegs)
Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index ad7104ceb0305..e5c34930bb29c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -78,6 +78,12 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+static cl::opt<bool> TrackPhysRegInTrackers(
+ "amdgpu-trackers-physical-register-tracking", cl::Hidden,
+ cl::desc("When using GCN trackers, count physical registers (e.g. from "
+ "inline asm) in pressure."),
+ cl::init(true));
+
static cl::opt<unsigned> PendingQueueLimit(
"amdgpu-scheduler-pending-queue-limit", cl::Hidden,
cl::desc(
@@ -107,14 +113,13 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
- DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {
-}
+ DownwardTracker(*C->LIS, C->MF->getRegInfo()),
+ UpwardTracker(*C->LIS, C->MF->getRegInfo()), HasHighPressure(false) {}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
MF = &DAG->MF;
-
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
SGPRExcessLimit =
@@ -164,6 +169,14 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
}
+void GCNRPTracker::setPhysRegTracking() {
+ if (!GCNTrackers || !TrackPhysRegInTrackers) {
+ TrackPhysRegs = false;
+ return;
+ }
+ TrackPhysRegs = true;
+}
+
/// Checks whether \p SU can use the cached DAG pressure diffs to compute the
/// current register pressure.
///
@@ -988,7 +1001,7 @@ GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
if (Regions[RegionIdx].first == Regions[RegionIdx].second)
return llvm::getVirtRegPressure(MRI, VirtLiveIns[RegionIdx]);
- GCNDownwardRPTracker RPTracker(*LIS);
+ GCNDownwardRPTracker RPTracker(*LIS, MF.getRegInfo());
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&VirtLiveIns[RegionIdx]);
return RPTracker.moveMaxPressure();
@@ -1002,7 +1015,7 @@ static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
const MachineBasicBlock *MBB) {
- GCNDownwardRPTracker RPTracker(*LIS);
+ GCNDownwardRPTracker RPTracker(*LIS, MF.getRegInfo());
// If the block has the only successor then live-ins of that successor are
// live-outs of the current block. We can reuse calculated live set if the
@@ -1030,20 +1043,20 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
--CurRegion;
auto I = MBB->begin();
- auto VirtLiveInIt = MBBVirtLiveIns.find(MBB);
+ auto LiveInIt = MBBVirtLiveIns.find(MBB);
auto &Rgn = Regions[CurRegion];
auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
- if (VirtLiveInIt != MBBVirtLiveIns.end()) {
- auto VirtLiveIn = std::move(VirtLiveInIt->second);
- RPTracker.reset(*MBB->begin(), &VirtLiveIn);
- MBBVirtLiveIns.erase(VirtLiveInIt);
+ if (LiveInIt != MBBVirtLiveIns.end()) {
+ auto LiveIn = std::move(LiveInIt->second);
+ RPTracker.reset(*MBB->begin(), &LiveIn);
+ MBBVirtLiveIns.erase(LiveInIt);
} else {
I = Rgn.first;
- auto VirtLiveInSet = BBVirtLiveInMap.lookup(NonDbgMI);
+ auto LRS = BBVirtLiveInMap.lookup(NonDbgMI);
#ifdef EXPENSIVE_CHECKS
- assert(isEqual(getVirtLiveRegsBefore(*NonDbgMI, *LIS), VirtLiveInSet));
+ assert(isEqual(getVirtLiveRegsBefore(*NonDbgMI, *LIS), LRS));
#endif
- RPTracker.reset(*I, &VirtLiveInSet);
+ RPTracker.reset(*I, &LRS);
}
for (;;) {
@@ -1104,8 +1117,8 @@ GCNScheduleDAGMILive::getRegionVirtLiveOutMap() const {
void RegionPressureMap::buildVirtLiveRegMap() {
IdxToInstruction.clear();
- RegionVirtLiveRegMap = IsLiveOut ? DAG->getRegionVirtLiveOutMap()
- : DAG->getRegionVirtLiveInMap();
+ RegionVirtLiveRegMap =
+ IsLiveOut ? DAG->getRegionVirtLiveOutMap() : DAG->getRegionVirtLiveInMap();
for (unsigned I = 0; I < DAG->Regions.size(); I++) {
auto &[RegionBegin, RegionEnd] = DAG->Regions[I];
// Skip empty regions.
@@ -1135,7 +1148,6 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
-
if (!Regions.empty()) {
BBVirtLiveInMap = getRegionVirtLiveInMap();
if (GCNTrackers)
@@ -1169,11 +1181,11 @@ void GCNScheduleDAGMILive::runSchedStages() {
if (GCNTrackers) {
GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
- GCNRPTracker::LiveRegSet *RegionVirtLiveIns =
+ GCNRPTracker::LiveRegSet *RegionLiveIns =
&VirtLiveIns[Stage->getRegionIdx()];
reinterpret_cast<GCNRPTracker *>(DownwardTracker)
- ->reset(MRI, *RegionVirtLiveIns);
+ ->reset(MRI, *RegionLiveIns);
reinterpret_cast<GCNRPTracker *>(UpwardTracker)
->reset(MRI, RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(
Stage->getRegionIdx()));
@@ -1405,9 +1417,9 @@ Printable PreRARematStage::ScoredRemat::print() const {
#endif
bool PreRARematStage::initGCNSchedStage() {
- // FIXME: This pass will invalidate cached BBVirtLiveInMap and MBBVirtLiveIns
- // for regions inbetween the defs and region we sinked the def to. Will need
- // to be fixed if there is another pass after this pass.
+ // FIXME: This pass will invalidate cached BBVirtLiveInMap and MBBVirtLiveIns for
+ // regions inbetween the defs and region we sinked the def to. Will need to be
+ // fixed if there is another pass after this pass.
assert(!S.hasNextStage());
if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
@@ -1712,12 +1724,12 @@ bool GCNSchedStage::initGCNRegion() {
PressureBefore = DAG.Pressure[RegionIdx];
- LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"
- << print(DAG.VirtLiveIns[RegionIdx], DAG.MRI)
- << "Region live-in pressure: "
- << print(llvm::getVirtRegPressure(
- DAG.MRI, DAG.VirtLiveIns[RegionIdx]))
- << "Region register pressure: " << print(PressureBefore));
+ LLVM_DEBUG(
+ dbgs() << "Pressure before scheduling:\nRegion live-ins:"
+ << print(DAG.VirtLiveIns[RegionIdx], DAG.MRI)
+ << "Region live-in pressure: "
+ << print(llvm::getVirtRegPressure(DAG.MRI, DAG.VirtLiveIns[RegionIdx]))
+ << "Region register pressure: " << print(PressureBefore));
S.HasHighPressure = false;
S.KnownExcessRP = isRegionWithExcessRP();
@@ -2724,12 +2736,11 @@ bool RewriteMFMAFormStage::rewrite(
// Bulk update the LIS.
DAG.LIS->reanalyze(DAG.MF);
// Liveins may have been modified for cross RC copies
- RegionPressureMap VirtLiveInUpdater(&DAG, false);
- VirtLiveInUpdater.buildVirtLiveRegMap();
+ RegionPressureMap LiveInUpdater(&DAG, false);
+ LiveInUpdater.buildVirtLiveRegMap();
for (unsigned Region = 0; Region < DAG.Regions.size(); Region++)
- DAG.VirtLiveIns[Region] =
- VirtLiveInUpdater.getVirtLiveRegsForRegionIdx(Region);
+ DAG.VirtLiveIns[Region] = LiveInUpdater.getVirtLiveRegsForRegionIdx(Region);
DAG.Pressure[RegionIdx] = DAG.getRealRegPressure(RegionIdx);
@@ -2854,13 +2865,11 @@ PreRARematStage::RematReg::RematReg(
// Mark regions in which the rematerializable register is live.
Register Reg = getReg();
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto VirtLiveInIt = DAG.VirtLiveIns[I].find(Reg);
- if (VirtLiveInIt != DAG.VirtLiveIns[I].end())
+ auto LiveInIt = DAG.VirtLiveIns[I].find(Reg);
+ if (LiveInIt != DAG.VirtLiveIns[I].end())
LiveIn.set(I);
- const auto &VirtLiveOuts =
- DAG.RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(I);
- if (auto VirtLiveOutIt = VirtLiveOuts.find(Reg);
- VirtLiveOutIt != VirtLiveOuts.end())
+ const auto &LiveOuts = DAG.RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(I);
+ if (auto LiveOutIt = LiveOuts.find(Reg); LiveOutIt != LiveOuts.end())
LiveOut.set(I);
}
Live |= LiveIn;
@@ -3006,8 +3015,8 @@ MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
- LaneBitmask VirtLiveInMask = DAG.VirtLiveIns[Remat->UseRegion].at(UseReg);
- LaneBitmask UncoveredLanes = LM & ~(VirtLiveInMask & LM);
+ LaneBitmask LiveInMask = DAG.VirtLiveIns[Remat->UseRegion].at(UseReg);
+ LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
// If this register has lanes not covered by the VirtLiveIns, be sure they
// do not map to any subrange. ref:
// machine-scheduler-sink-trivial-remats.mir::omitted_subrange
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index a44b28de507fc..4e3a55d5a79f7 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -275,7 +275,7 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
"amdgpu-max-memory-clause", MaxClause);
for (MachineBasicBlock &MBB : MF) {
- GCNDownwardRPTracker RPT(*LIS);
+ GCNDownwardRPTracker RPT(*LIS, *MRI);
MachineBasicBlock::instr_iterator Next;
for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
MachineInstr &MI = *I;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 6dcd78ee52f5e..059d930dc8e45 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -11164,73 +11164,73 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
; GFX908-GCNTRACKERS-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]]
; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -11508,18 +11508,18 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-GCNTRACKERS-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 481eb1bc3d91a..810f478b3f12a 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -7,6 +7,14 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX7-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX8-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefixes=GFX900-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefixes=GFX942-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX10_1-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX10_3-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX11-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX12-GCNTRACKERS %s
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
<16 x i32>, <7 x i32>, ; vgprs
@@ -563,6 +571,540 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX7-GCNTRACKERS-NEXT: v_add_i32_e32 v0, vcc, 64, v0
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX7-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, 0x4040
+; GFX7-GCNTRACKERS-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX7-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v0, 6, v0
+; GFX7-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX8-GCNTRACKERS-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX8-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, 0x4040
+; GFX8-GCNTRACKERS-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v0, 6, v0
+; GFX8-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 0x4040, v0
+; GFX900-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
+; GFX10_1-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v24
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
+; GFX10_3-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v24
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX12-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: s_add_co_ci_u32 s59, s32, 0x4000
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX12-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
@@ -1084,6 +1626,485 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX7-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v22, 6, v22
+; GFX7-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v22, 6, v22
+; GFX8-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 6, s32
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v22, 16, v22
+; GFX900-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4010
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 16
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4010
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v22, 16, v22
+; GFX10_1-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v22, 16, v22
+; GFX10_3-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4010
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 16
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4010
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s32
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
; Force no SGPRs to be available for the carry-out of the vector add.
@@ -1660,6 +2681,543 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s28, 17
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s29, 18
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX7-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX7-GCNTRACKERS-NEXT: v_add_i32_e32 v0, vcc, 64, v0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s28, v23, 17
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s29, v23, 18
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX8-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX8-GCNTRACKERS-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX900-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s58, s4, 0x4240
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s58, s4, 0x4240
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX11-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX12-GCNTRACKERS-NEXT: s_add_co_i32 s58, s32, 0x4200
+; GFX12-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
index f70cd6816a966..b2b73e9a96fcb 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
@@ -1,5 +1,6 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 2>&1 < %s | FileCheck -check-prefixes=ERR-GCNTRACKERS %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack 2>&1 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 2>&1 < %s | FileCheck -check-prefixes=GCN-TRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack 2>&1 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 2>&1 < %s | FileCheck --check-prefix=GCN-NOPHYS-FAIL %s
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
<16 x i32>, <7 x i32>, ; vgprs
@@ -16,10 +17,13 @@
i64 ; vcc
}
-; ERR-GCNTRACKERS: ran out of registers during register allocation
+; GCN-TRACKERS-NOT: ran out of registers during register allocation
; GCN-NOT: ran out of registers during register allocation
+; GCN-NOPHYS-FAIL: ran out of registers during register allocation
-; FIXME: GCN Trackers do not track pressure from PhysRegs, so scheduling is actually worse
+; GCN Trackers now track physical register pressure correctly, so this test
+; verifies that both trackers can successfully handle code with heavy physical
+; register usage from inline assembly.
define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 {
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 0d25bc97ff775..0d81a11243ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -1,17 +1,31 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN-GCNTRACKERS %s
-
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED-GCNTRACKERS %s
+; REQUIRES: asserts
; CHECK-LABEL: {{^}}spill:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
; GCN: NumVgprs: 1
-; GCN-GCNTRACKERS: NumVgprs: 2
+; GCN-GCNTRACKERS: NumVgprs: 1
; GCN: ScratchSize: 0
; GCN-GCNTRACKERS: ScratchSize: 0
; GCN: Occupancy: 5
; GCN-GCNTRACKERS: Occupancy: 5
-
-; FIXME: GCN Trackers do not track pressure from PhysRegs, so scheduling is actually worse
+;
+; Check scheduling pressure values:
+; SCHED-LABEL: spill:%bb.0 entry
+; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
+; SCHED: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 97
+;
+; SCHED-GCNTRACKERS-LABEL: spill:%bb.0 entry
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 99
+; SCHED-GCNTRACKERS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 98
+;
+; NOTE: GCN Trackers now track pressure from both virtual and physical registers.
+; The GCN tracker now matches the generic tracker's VGPR count (1 VGPR).
+; The SGPR pressure is still slightly higher (98 vs 97) due to summing physical
+; register pressure from inline asm constraints with virtual register pressure.
define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
@@ -247,9 +261,15 @@ bb3:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
; GCN: NumVgprs: 2
-; GCN-GCNTRACKERS: NumVgprs: 3
+; GCN-GCNTRACKERS: NumVgprs: 2
; GCN: ScratchSize: 8
-; GCN-GCNTRACKERS: ScratchSize: 12
+; GCN-GCNTRACKERS: ScratchSize: 8
+;
+; SCHED-LABEL: spill_func:%bb.0 entry
+; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 97
+;
+; SCHED-GCNTRACKERS-LABEL: spill_func:%bb.0 entry
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
define void @spill_func(ptr addrspace(1) %arg) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
new file mode 100644
index 0000000000000..084acee121f78
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -0,0 +1,513 @@
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
+; REQUIRES: asserts
+
+; Test that GCN trackers correctly track physical register pressure from inline asm
+
+; GCN-DEBUG-LABEL: test_single_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_single_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test multiple physical registers
+
+; GCN-DEBUG-LABEL: test_multiple_physregs
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_multiple_physregs
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register with virtual registers
+
+; GCN-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+entry:
+ %asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ %val = load i32, ptr addrspace(1) %in
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test that we don't inflate pressure when not using GCN trackers
+
+; GCN-DEBUG-LABEL: test_no_inflation
+
+; GENERIC-DEBUG-LABEL: test_no_inflation
+
+; GCN-NOPHYS-DEBUG-LABEL: test_no_inflation
+
+define amdgpu_kernel void @test_no_inflation() {
+entry:
+ ret void
+}
+
+; Test early-clobber constraint
+
+; GCN-DEBUG-LABEL: test_early_clobber
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_early_clobber
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register input
+
+; GCN-DEBUG-LABEL: test_physreg_input
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_physreg_input
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_input
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test virtual and physical register overlap
+
+; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 18
+
+; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+
+; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+
+define amdgpu_kernel void @test_vreg_and_physreg_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ %val1 = load i32, ptr addrspace(1) %in1
+ %val2 = load i32, ptr addrspace(1) %in2
+ %sum = add i32 %val1, %val2
+ store i32 %sum, ptr addrspace(1) %out
+ ret void
+}
+
+; Verify assembly output for GCN trackers
+; GCN-LABEL: test_single_physreg:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_single_physreg.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_multiple_physregs:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_multiple_physregs.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_physreg_with_vreg:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_mov_b32 s4, s2
+; GCN-NEXT: s_mov_b32 s5, s3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_no_inflation:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_endpgm
+; GCN: .set test_no_inflation.numbered_sgpr, 0
+; GCN: TotalNumSgprs: 0
+; GCN: NumVgprs: 0
+
+; GCN-LABEL: test_early_clobber:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_early_clobber.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_physreg_input:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_physreg_input.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: s_mov_b32 s0, s2
+; GCN-NEXT: s_mov_b32 s1, s3
+; GCN-NEXT: s_mov_b32 s2, s6
+; GCN-NEXT: s_mov_b32 s3, s7
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NEXT: s_mov_b32 s10, s6
+; GCN-NEXT: s_mov_b32 s11, s7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 14
+; GCN: NumVgprs: 2
+
+; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
+; GCN-NOPHYS-LABEL: test_single_physreg:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_single_physreg.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_multiple_physregs:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_multiple_physregs.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_no_inflation:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_no_inflation.numbered_sgpr, 0
+; GCN-NOPHYS: TotalNumSgprs: 0
+; GCN-NOPHYS: NumVgprs: 0
+
+; GCN-NOPHYS-LABEL: test_early_clobber:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_early_clobber.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_physreg_input:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_physreg_input.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s11, s7
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 14
+; GCN-NOPHYS: NumVgprs: 2
+
+; Verify assembly output without GCN trackers (should be identical)
+; NO-GCN-LABEL: test_single_physreg:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_single_physreg.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_multiple_physregs:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_multiple_physregs.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_physreg_with_vreg:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_mov_b32 s4, s2
+; NO-GCN-NEXT: s_mov_b32 s5, s3
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_no_inflation:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_no_inflation.numbered_sgpr, 0
+; NO-GCN: TotalNumSgprs: 0
+; NO-GCN: NumVgprs: 0
+
+; NO-GCN-LABEL: test_early_clobber:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_early_clobber.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_physreg_input:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_physreg_input.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_vreg_and_physreg_overlap:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: s_mov_b32 s0, s2
+; NO-GCN-NEXT: s_mov_b32 s1, s3
+; NO-GCN-NEXT: s_mov_b32 s2, s6
+; NO-GCN-NEXT: s_mov_b32 s3, s7
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; NO-GCN-NEXT: s_mov_b32 s10, s6
+; NO-GCN-NEXT: s_mov_b32 s11, s7
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 14
+; NO-GCN: NumVgprs: 2
>From 5788d0d279739dec459cd445c2ec5a83a3243048 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 3 Mar 2026 21:53:28 -0600
Subject: [PATCH 02/25] [AMDGPU] Used LiveRegUnits for physical register
pressure tracking.
Addressed review comments as well.
---
llvm/include/llvm/CodeGen/RegisterPressure.h | 14 -
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 215 ++++++------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 30 +-
.../machine-scheduler-sink-trivial-remats.mir | 10 +-
.../AMDGPU/materialize-frame-index-sgpr.ll | 10 +-
.../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 9 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 310 +++++++++++-------
7 files changed, 333 insertions(+), 265 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 01a944f386014..7485be6dcb351 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -293,20 +293,6 @@ class LiveRegSet {
}
public:
- LiveRegSet() = default;
-
- // Copy assignment operator - copies live register contents.
- // Note: Both LiveRegSets must have been initialized with init() first.
- LiveRegSet &operator=(const LiveRegSet &Other) {
- if (this != &Other) {
- NumRegUnits = Other.NumRegUnits;
- Regs.clear();
- for (const IndexMaskPair &Pair : Other.Regs)
- Regs.insert(Pair);
- }
- return *this;
- }
-
LLVM_ABI void clear();
LLVM_ABI void init(const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3e619ef8ba8ba..d8616a7a5047f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,7 +14,9 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/Support/MathExtras.h"
@@ -48,36 +50,10 @@ unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
: (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
}
-void GCNRegPressure::inc(unsigned Reg,
- LaneBitmask PrevMask,
- LaneBitmask NewMask,
- const MachineRegisterInfo &MRI) {
- const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
- const TargetRegisterClass *RC;
- if (Register(Reg).isVirtual()) {
- RC = MRI.getRegClass(Reg);
- } else {
- if (!MRI.isAllocatable(Reg))
- return;
- RC = TRI->getMinimalPhysRegClass(Reg);
- if (!RC)
- return;
- }
-
- unsigned RegKind = getRegKind(RC, STI);
+void GCNRegPressure::inc(unsigned Reg, LaneBitmask PrevMask,
+ LaneBitmask NewMask, const MachineRegisterInfo &MRI) {
unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(NewMask);
unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(PrevMask);
- // If multiple bits are set in the input masks for physical SGPRs, the
- // expected result does not match what getNumCoveredRegs returns. This is
- // because it returns the number of vector lanes, not the number of 32-bit
- // regs. Hence, cap to the register's actual size so e.g. a 32-bit SGPR counts
- // as 1 and VCC (64-bit) counts as 2, not 32.
- if (Register(Reg).isPhysical() && RegKind == SGPR) {
- unsigned MaxCovered = divideCeil(TRI->getRegSizeInBits(*RC), 32);
- NewNumCoveredRegs = std::min(NewNumCoveredRegs, MaxCovered);
- PrevNumCoveredRegs = std::min(PrevNumCoveredRegs, MaxCovered);
- }
if (NewNumCoveredRegs == PrevNumCoveredRegs)
return;
@@ -90,6 +66,10 @@ void GCNRegPressure::inc(unsigned Reg,
assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
"prev mask should always be lesser than new");
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ unsigned RegKind = getRegKind(RC, STI);
if (TRI->getRegSizeInBits(*RC) != 32) {
// Reg is from a tuple register class.
if (PrevMask.none()) {
@@ -119,6 +99,28 @@ void GCNRegPressure::inc(unsigned Reg,
Value[RegKind] += Sign;
}
+void GCNRegPressure::inc(MCRegister Reg, bool IsAdd,
+ const MachineRegisterInfo &MRI) {
+ if (!MRI.isAllocatable(Reg))
+ return;
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ if (!RC)
+ return;
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ unsigned RegKind = getRegKind(RC, STI);
+ unsigned NumRegs = divideCeil(TRI->getRegSizeInBits(*RC), 32);
+ int Sign = IsAdd ? 1 : -1;
+
+ if (TRI->getRegSizeInBits(*RC) != 32) {
+ unsigned TupleIdx = TOTAL_KINDS + RegKind;
+ Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
+ Value[RegKind] += Sign * static_cast<int>(NumRegs);
+ } else {
+ Value[RegKind] += Sign;
+ }
+}
+
namespace {
struct RegExcess {
unsigned SGPR = 0;
@@ -495,72 +497,64 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
const LiveRange *LR = LIS.getCachedRegUnit(Unit);
- if (!LR)
- return false;
- return LR->liveAt(SI);
+ // If LIS has no reg-unit live range, be conservative and assume it is live.
+ return !LR || LR->liveAt(SI);
}
-bool GCNRPTracker::allRegUnitsLive(Register Reg) const {
+bool GCNRPTracker::allRegUnitsLive(MCRegister Reg) const {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const BitVector &Units = PhysLiveRegs.getBitVector();
return llvm::all_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
- return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any();
+ return Units.test(static_cast<unsigned>(Unit));
});
}
-bool GCNRPTracker::checkRegKilled(Register Reg, SlotIndex SI) const {
+bool GCNRPTracker::checkRegKilled(MCRegister Reg, SlotIndex SI) const {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const BitVector &Units = PhysLiveRegs.getBitVector();
return llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
- return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any() &&
- !isUnitLiveAt(Unit, SI);
+ return Units.test(static_cast<unsigned>(Unit)) && !isUnitLiveAt(Unit, SI);
});
}
-bool GCNRPTracker::eraseKilledUnits(Register Reg, SlotIndex SI) {
+bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- bool IsKilled = false;
+ BitVector KilledUnits(PhysLiveRegs.getBitVector().size(), false);
for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.any()) {
- if (!isUnitLiveAt(Unit, SI)) {
- IsKilled = true;
- PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ unsigned U = static_cast<unsigned>(Unit);
+ if (PhysLiveRegs.getBitVector().test(U) && !isUnitLiveAt(Unit, SI))
+ KilledUnits.set(U);
}
- return IsKilled;
+ if (KilledUnits.none())
+ return false;
+ PhysLiveRegs.removeUnits(KilledUnits);
+ return true;
}
-bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
+bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- bool WasLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.any()) {
- WasLive = true;
- PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ const BitVector &Units = PhysLiveRegs.getBitVector();
+ bool WasLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return Units.test(static_cast<unsigned>(Unit));
+ });
+ if (WasLive)
+ PhysLiveRegs.removeReg(Reg);
return WasLive;
}
-bool GCNRPTracker::insertAllNotLiveUnits(Register Reg) {
+bool GCNRPTracker::insertAllNotLiveUnits(MCRegister Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- bool WasNotLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.none()) {
- WasNotLive = true;
- PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ const BitVector &Units = PhysLiveRegs.getBitVector();
+ bool WasNotLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return !Units.test(static_cast<unsigned>(Unit));
+ });
+ if (WasNotLive)
+ PhysLiveRegs.addReg(Reg);
return WasNotLive;
}
@@ -618,7 +612,7 @@ void GCNRPTracker::reset(const MachineInstr &MI,
// Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
- PhysLiveRegs.init(*MRI);
+ PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
MaxPhysPressure.clear();
CurPhysPressure.clear();
}
@@ -635,7 +629,7 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
// Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
- PhysLiveRegs.init(*MRI);
+ PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
MaxPhysPressure.clear();
CurPhysPressure.clear();
}
@@ -721,30 +715,31 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (!MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was live before and erase them.
- bool WasLive = eraseAllLiveUnits(Reg);
+ // Check if any unit of this register was live before and if so,
+ // erase all of the regunits from PhysLiveRegs.
+ bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
- // Update pressure once per register if it was live.
+ // Update pressure once per register if any unit of this register was live
+ // before.
if (WasLive)
- CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
- *MRI);
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
// Make physical register uses alive (moving backward in upward tracking).
- for (const MachineOperand &MO : MI.uses()) {
+ for (const MachineOperand &MO : MI.all_uses()) {
if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
continue;
Register Reg = MO.getReg();
if (!MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was not live before and insert them.
- bool WasNotLive = insertAllNotLiveUnits(Reg);
+ // Check if any unit of this register was not live before and if so,
+ // insert all of the regunits into PhysLiveRegs.
+ bool WasNotLive = insertAllNotLiveUnits(Reg.asMCReg());
- // Update pressure once per register if it wasn't live before.
- if (WasNotLive) {
- CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
- *MRI);
- }
+ // Update pressure once per register if any unit of this register was not
+ // live before.
+ if (WasNotLive)
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
@@ -830,7 +825,8 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
// Track physical register kills (only if enabled).
if (TrackPhysRegs) {
- // Iterate over actual instruction operands to track which registers die.
+ // Iterate over actual instruction operands to track which regunits are
+ // killed.
SmallSet<Register, 8> SeenRegs;
for (const auto &MO : CurrMI->operands()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
@@ -840,12 +836,12 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
continue;
// Check if any unit of this register is killed and erase killed units.
- bool IsKilled = eraseKilledUnits(Reg, SI);
+ bool IsKilled = eraseKilledUnits(Reg.asMCReg(), SI);
- // Update pressure once per register if it was live and is now killed.
+ // Update pressure once per register if any unit of this register is
+ // killed.
if (IsKilled)
- CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
- *MRI);
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
}
@@ -886,21 +882,16 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
if (!Reg.isPhysical() || !MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was not live before.
- bool WasNotLive = false;
- for (MCRegUnit Unit : MRI->getTargetRegisterInfo()->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.none())
- WasNotLive = true;
- // Mark unit as live
- PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
+ // Check if any unit of this register is not live before and if so,
+ // insert all of the regunits into PhysLiveRegs.
+ bool WasNotLive = !allRegUnitsLive(Reg.asMCReg());
+ if (WasNotLive && !MO.isDead())
+ PhysLiveRegs.addReg(Reg);
- // Update pressure once per register if it wasn't live before.
- if (WasNotLive)
- CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
- *MRI);
+ // Update pressure once per register if any unit of this register is not
+ // live before.
+ if (WasNotLive && !MO.isDead())
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
}
@@ -1033,17 +1024,17 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
continue;
// Check if any unit of this register is not currently live.
- bool WasNotLive = !allRegUnitsLive(Reg);
+ bool WasNotLive = !allRegUnitsLive(Reg.asMCReg());
- if (WasNotLive && !MO.isDead()) {
- TempPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
- *MRI);
- }
+ // Update pressure once per register if any unit of this register is not
+ // live before.
+ if (WasNotLive && !MO.isDead())
+ TempPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
// Process physical register uses to find kills.
SeenRegs.clear();
- for (const auto &MO : MI->uses()) {
+ for (const auto &MO : MI->all_uses()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
Register Reg = MO.getReg();
@@ -1052,12 +1043,12 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
continue;
// Check if any unit of this register is killed.
- bool IsKilled = checkRegKilled(Reg, SlotIdx);
+ bool IsKilled = checkRegKilled(Reg.asMCReg(), SlotIdx);
- if (IsKilled) {
- TempPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
- *MRI);
- }
+ // Update pressure once per register if any unit of this register is
+ // killed.
+ if (IsKilled)
+ TempPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index b3865fbade3ce..61d06a4f88cca 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -19,6 +19,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
#include <array>
@@ -130,6 +131,10 @@ struct GCNRegPressure {
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
+ /// Update pressure for a physical register (add or remove). Used when
+ /// tracking physical registers.
+ void inc(MCRegister Reg, bool IsAdd, const MachineRegisterInfo &MRI);
+
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
unsigned DynamicVGPRBlockSize) const {
return getOccupancy(ST, DynamicVGPRBlockSize) >
@@ -327,8 +332,8 @@ class GCNRPTracker {
// Physical register tracking: Maintain clean separation between virtual and
// physical registers. Tracking physical registers can be turned OFF with an
- // option. Using llvm::LiveRegSet for consistency with the generic tracker.
- llvm::LiveRegSet PhysLiveRegs;
+ // option. Uses LiveRegUnits (bit vector of live register units).
+ LiveRegUnits PhysLiveRegs;
GCNRegPressure CurPhysPressure, MaxPhysPressure;
// Flag to control whether physical register tracking is active.
@@ -342,7 +347,7 @@ class GCNRPTracker {
: LIS(LIS), MRI(&MRI) {
setPhysRegTracking();
if (TrackPhysRegs)
- PhysLiveRegs.init(MRI);
+ PhysLiveRegs.init(*MRI.getTargetRegisterInfo());
}
// Copy constructor - PhysLiveRegs must be initialized then copied.
@@ -354,11 +359,10 @@ class GCNRPTracker {
MaxPhysPressure(Other.MaxPhysPressure),
TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
MRI(Other.MRI) {
- // Initialize PhysLiveRegs with proper universe, then copy contents.
- if (MRI) {
- PhysLiveRegs.init(*MRI);
- PhysLiveRegs =
- Other.PhysLiveRegs; // Use assignment operator to copy live regs.
+ if (TrackPhysRegs) {
+ assert(MRI && "MRI not initialized");
+ PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
+ PhysLiveRegs.addUnits(Other.PhysLiveRegs.getBitVector());
}
}
@@ -374,21 +378,21 @@ class GCNRPTracker {
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
// Check if all register units of Reg are currently live in PhysLiveRegs.
- bool allRegUnitsLive(Register Reg) const;
+ bool allRegUnitsLive(MCRegister Reg) const;
// Check if Reg has any killed units at the given slot index.
- bool checkRegKilled(Register Reg, SlotIndex SI) const;
+ bool checkRegKilled(MCRegister Reg, SlotIndex SI) const;
// Check if Reg has any killed units and erase them from PhysLiveRegs.
- bool eraseKilledUnits(Register Reg, SlotIndex SI);
+ bool eraseKilledUnits(MCRegister Reg, SlotIndex SI);
// Erase all live units of Reg from PhysLiveRegs.
// Returns true if any unit was live (and thus erased).
- bool eraseAllLiveUnits(Register Reg);
+ bool eraseAllLiveUnits(MCRegister Reg);
// Insert all not-live units of Reg into PhysLiveRegs.
// Returns true if any unit was not live (and thus inserted).
- bool insertAllNotLiveUnits(Register Reg);
+ bool insertAllNotLiveUnits(MCRegister Reg);
public:
// Enable physical register tracking only if both GCNTrackers and
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 059d930dc8e45..2d2ff828d3ef5 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -11507,19 +11507,19 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-GCNTRACKERS-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 810f478b3f12a..14bd5a9477acc 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -799,7 +799,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
-; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
@@ -1826,7 +1826,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
-; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
@@ -2916,12 +2916,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
-; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
-; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
-; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
@@ -2929,7 +2927,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 0d81a11243ccf..252875377d1ea 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -19,13 +19,14 @@
; SCHED: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 97
;
; SCHED-GCNTRACKERS-LABEL: spill:%bb.0 entry
-; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 99
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 193
; SCHED-GCNTRACKERS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 98
;
; NOTE: GCN Trackers now track pressure from both virtual and physical registers.
; The GCN tracker now matches the generic tracker's VGPR count (1 VGPR).
-; The SGPR pressure is still slightly higher (98 vs 97) due to summing physical
-; register pressure from inline asm constraints with virtual register pressure.
+; When a live range is not found for a physical regunit, we conservatively
+; assume the unit is live, so Region SGPR pressure can be higher (193 vs 98).
+; Pressure after scheduling remains 98 vs 97 due to physical register tracking.
define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
@@ -269,7 +270,7 @@ bb3:
; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 97
;
; SCHED-GCNTRACKERS-LABEL: spill_func:%bb.0 entry
-; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 192
define void @spill_func(ptr addrspace(1) %arg) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 084acee121f78..fdb10483d9a42 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -9,77 +9,68 @@
; Test that GCN trackers correctly track physical register pressure from inline asm
; GCN-DEBUG-LABEL: test_single_physreg
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_single_physreg
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
- store i32 0, ptr addrspace(1) %out
+ store i32 %val, ptr addrspace(1) %out
ret void
}
; Test multiple physical registers
; GCN-DEBUG-LABEL: test_multiple_physregs
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_multiple_physregs
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
- store i32 0, ptr addrspace(1) %out
+ %r0 = extractvalue { i32, i32 } %result, 0
+ %r1 = extractvalue { i32, i32 } %result, 1
+ %sum = add i32 %r0, %r1
+ store i32 %sum, ptr addrspace(1) %out
ret void
}
; Test physical register with virtual registers
; GCN-DEBUG-LABEL: test_physreg_with_vreg
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
%val = load i32, ptr addrspace(1) %in
- store i32 %val, ptr addrspace(1) %out
- ret void
-}
-
-; Test that we don't inflate pressure when not using GCN trackers
-
-; GCN-DEBUG-LABEL: test_no_inflation
-
-; GENERIC-DEBUG-LABEL: test_no_inflation
-
-; GCN-NOPHYS-DEBUG-LABEL: test_no_inflation
-
-define amdgpu_kernel void @test_no_inflation() {
-entry:
+ %sum = add i32 %asm_val, %val
+ store i32 %sum, ptr addrspace(1) %out
ret void
}
@@ -107,45 +98,95 @@ entry:
; Test physical register input
; GCN-DEBUG-LABEL: test_physreg_input
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_physreg_input
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
; GCN-NOPHYS-DEBUG-LABEL: test_physreg_input
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
- store i32 0, ptr addrspace(1) %out
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register pressure for tuple (64-bit) registers.
+; GCN tracker counts the 2 SGPRs.
+
+; GCN-DEBUG-LABEL: test_tuple_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+; GENERIC-DEBUG-LABEL: test_tuple_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_tuple_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_tuple_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i64 asm sideeffect "s_mov_b64 $0, 0", "={s[10:11]}"()
+ %lo = trunc i64 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register pressure for 128-bit tuple.
+; GCN tracker counts the 4 SGPRs.
+
+; GCN-DEBUG-LABEL: test_tuple128_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_tuple128_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+; GCN-NOPHYS-DEBUG-LABEL: test_tuple128_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+define amdgpu_kernel void @test_tuple128_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i128 asm sideeffect "s_mov_b64 $0, 0; s_mov_b64 $0+2, 0", "={s[8:11]}"()
+ %lo = trunc i128 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
ret void
}
; Test virtual and physical register overlap
-; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 18
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 16, LVGPR WT: 0, LSGPR WT: 16
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
-; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-define amdgpu_kernel void @test_vreg_and_physreg_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+define amdgpu_kernel void @test_vreg_and_physreg_live_range_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
%val1 = load i32, ptr addrspace(1) %in1
%val2 = load i32, ptr addrspace(1) %in2
%sum = add i32 %val1, %val2
- store i32 %sum, ptr addrspace(1) %out
+ %r0 = extractvalue { i32, i32 } %result, 0
+ %r1 = extractvalue { i32, i32 } %result, 1
+ %with_asm = add i32 %sum, %r0
+ %final = add i32 %with_asm, %r1
+ store i32 %final, ptr addrspace(1) %out
ret void
}
@@ -155,10 +196,10 @@ entry:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_mov_b32 s10, 0
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -169,12 +210,13 @@ entry:
; GCN-LABEL: test_multiple_physregs:
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_add_i32 s4, s10, s11
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -197,19 +239,13 @@ entry:
; GCN-NEXT: s_mov_b32 s4, s2
; GCN-NEXT: s_mov_b32 s5, s3
; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-NEXT: s_endpgm
; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 11
+; GCN: TotalNumSgprs: 13
; GCN: NumVgprs: 1
-; GCN-LABEL: test_no_inflation:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_endpgm
-; GCN: .set test_no_inflation.numbered_sgpr, 0
-; GCN: TotalNumSgprs: 0
-; GCN: NumVgprs: 0
-
; GCN-LABEL: test_early_clobber:
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -231,10 +267,10 @@ entry:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s11
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -242,10 +278,28 @@ entry:
; GCN: TotalNumSgprs: 12
; GCN: NumVgprs: 1
-; GCN-LABEL: test_vreg_and_physreg_overlap:
+; GCN-LABEL: test_tuple_physreg:
+; GCN: ;;#ASMSTART
+; GCN: s_mov_b64 s[10:11], 0
+; GCN: ;;#ASMEND
+; GCN: v_mov_b32_e32 v0, s10
+; GCN: .set test_tuple_physreg.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_tuple128_physreg:
+; GCN: ;;#ASMSTART
+; GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN: ;;#ASMEND
+; GCN: v_mov_b32_e32 v0, s8
+; GCN: .set test_tuple128_physreg.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: ;;#ASMSTART
@@ -260,14 +314,16 @@ entry:
; GCN-NEXT: s_mov_b32 s3, s7
; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NEXT: s_mov_b32 s10, s6
-; GCN-NEXT: s_mov_b32 s11, s7
+; GCN-NEXT: s_mov_b32 s14, s6
+; GCN-NEXT: s_mov_b32 s15, s7
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
; GCN-NEXT: s_endpgm
-; GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 14
+; GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
+; GCN: TotalNumSgprs: 18
; GCN: NumVgprs: 2
; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
@@ -276,10 +332,10 @@ entry:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
; GCN-NOPHYS-NEXT: ;;#ASMSTART
; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NOPHYS-NEXT: s_endpgm
@@ -290,12 +346,13 @@ entry:
; GCN-NOPHYS-LABEL: test_multiple_physregs:
; GCN-NOPHYS-NEXT: ; %bb.0:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
; GCN-NOPHYS-NEXT: ;;#ASMSTART
; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_add_i32 s4, s10, s11
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s4
; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NOPHYS-NEXT: s_endpgm
@@ -318,19 +375,13 @@ entry:
; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-NOPHYS-NEXT: s_endpgm
; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: TotalNumSgprs: 13
; GCN-NOPHYS: NumVgprs: 1
-; GCN-NOPHYS-LABEL: test_no_inflation:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_no_inflation.numbered_sgpr, 0
-; GCN-NOPHYS: TotalNumSgprs: 0
-; GCN-NOPHYS: NumVgprs: 0
-
; GCN-NOPHYS-LABEL: test_early_clobber:
; GCN-NOPHYS-NEXT: ; %bb.0:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -352,10 +403,10 @@ entry:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
; GCN-NOPHYS-NEXT: ;;#ASMSTART
; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s11
; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NOPHYS-NEXT: s_endpgm
@@ -363,10 +414,28 @@ entry:
; GCN-NOPHYS: TotalNumSgprs: 12
; GCN-NOPHYS: NumVgprs: 1
-; GCN-NOPHYS-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NOPHYS-LABEL: test_tuple_physreg:
+; GCN-NOPHYS: ;;#ASMSTART
+; GCN-NOPHYS: s_mov_b64 s[10:11], 0
+; GCN-NOPHYS: ;;#ASMEND
+; GCN-NOPHYS: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS: .set test_tuple_physreg.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_tuple128_physreg:
+; GCN-NOPHYS: ;;#ASMSTART
+; GCN-NOPHYS: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN-NOPHYS: ;;#ASMEND
+; GCN-NOPHYS: v_mov_b32_e32 v0, s8
+; GCN-NOPHYS: .set test_tuple128_physreg.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_live_range_overlap:
; GCN-NOPHYS-NEXT: ; %bb.0:
; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
; GCN-NOPHYS-NEXT: ;;#ASMSTART
@@ -381,14 +450,16 @@ entry:
; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, s6
-; GCN-NOPHYS-NEXT: s_mov_b32 s11, s7
+; GCN-NOPHYS-NEXT: s_mov_b32 s14, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s15, s7
; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[12:15], 0
; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 14
+; GCN-NOPHYS: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
+; GCN-NOPHYS: TotalNumSgprs: 18
; GCN-NOPHYS: NumVgprs: 2
; Verify assembly output without GCN trackers (should be identical)
@@ -397,10 +468,10 @@ entry:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
; NO-GCN-NEXT: ;;#ASMSTART
; NO-GCN-NEXT: s_mov_b32 s10, 0
; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NO-GCN-NEXT: s_endpgm
@@ -411,12 +482,13 @@ entry:
; NO-GCN-LABEL: test_multiple_physregs:
; NO-GCN-NEXT: ; %bb.0:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
; NO-GCN-NEXT: ;;#ASMSTART
; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_add_i32 s4, s10, s11
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s4
; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NO-GCN-NEXT: s_endpgm
@@ -439,19 +511,13 @@ entry:
; NO-GCN-NEXT: s_mov_b32 s4, s2
; NO-GCN-NEXT: s_mov_b32 s5, s3
; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
; NO-GCN-NEXT: s_endpgm
; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: TotalNumSgprs: 13
; NO-GCN: NumVgprs: 1
-; NO-GCN-LABEL: test_no_inflation:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_no_inflation.numbered_sgpr, 0
-; NO-GCN: TotalNumSgprs: 0
-; NO-GCN: NumVgprs: 0
-
; NO-GCN-LABEL: test_early_clobber:
; NO-GCN-NEXT: ; %bb.0:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -473,10 +539,10 @@ entry:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
; NO-GCN-NEXT: ;;#ASMSTART
; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s11
; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NO-GCN-NEXT: s_endpgm
@@ -484,10 +550,28 @@ entry:
; NO-GCN: TotalNumSgprs: 12
; NO-GCN: NumVgprs: 1
-; NO-GCN-LABEL: test_vreg_and_physreg_overlap:
+; NO-GCN-LABEL: test_tuple_physreg:
+; NO-GCN: ;;#ASMSTART
+; NO-GCN: s_mov_b64 s[10:11], 0
+; NO-GCN: ;;#ASMEND
+; NO-GCN: v_mov_b32_e32 v0, s10
+; NO-GCN: .set test_tuple_physreg.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_tuple128_physreg:
+; NO-GCN: ;;#ASMSTART
+; NO-GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; NO-GCN: ;;#ASMEND
+; NO-GCN: v_mov_b32_e32 v0, s8
+; NO-GCN: .set test_tuple128_physreg.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
; NO-GCN-NEXT: ; %bb.0:
; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; NO-GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; NO-GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
; NO-GCN-NEXT: s_mov_b32 s6, -1
; NO-GCN-NEXT: ;;#ASMSTART
@@ -502,12 +586,14 @@ entry:
; NO-GCN-NEXT: s_mov_b32 s3, s7
; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; NO-GCN-NEXT: s_mov_b32 s10, s6
-; NO-GCN-NEXT: s_mov_b32 s11, s7
+; NO-GCN-NEXT: s_mov_b32 s14, s6
+; NO-GCN-NEXT: s_mov_b32 s15, s7
; NO-GCN-NEXT: s_waitcnt vmcnt(0)
; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 14
+; NO-GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
+; NO-GCN: TotalNumSgprs: 18
; NO-GCN: NumVgprs: 2
>From 75a60fb82f9751a14f36bb8a2bbacb5c76917a26 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 24 Mar 2026 17:32:26 -0500
Subject: [PATCH 03/25] [AMDGPU] Addressed reviewer comments.
Make some pressure inc() logic unconditional to match existing code.
Un-invert some logic and naming for regunit management.
Use auto-gen script for new test while maintaining debug messages.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 22 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 6 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 788 +++++++++---------
3 files changed, 386 insertions(+), 430 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index d8616a7a5047f..2fc2fc98d1631 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -115,10 +115,8 @@ void GCNRegPressure::inc(MCRegister Reg, bool IsAdd,
if (TRI->getRegSizeInBits(*RC) != 32) {
unsigned TupleIdx = TOTAL_KINDS + RegKind;
Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
- Value[RegKind] += Sign * static_cast<int>(NumRegs);
- } else {
- Value[RegKind] += Sign;
}
+ Value[RegKind] += Sign * static_cast<int>(NumRegs);
}
namespace {
@@ -546,16 +544,16 @@ bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
return WasLive;
}
-bool GCNRPTracker::insertAllNotLiveUnits(MCRegister Reg) {
+bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const BitVector &Units = PhysLiveRegs.getBitVector();
- bool WasNotLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ bool NewlyLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
return !Units.test(static_cast<unsigned>(Unit));
});
- if (WasNotLive)
+ if (NewlyLive)
PhysLiveRegs.addReg(Reg);
- return WasNotLive;
+ return NewlyLive;
}
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
@@ -732,13 +730,11 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
Register Reg = MO.getReg();
if (!MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was not live before and if so,
- // insert all of the regunits into PhysLiveRegs.
- bool WasNotLive = insertAllNotLiveUnits(Reg.asMCReg());
+ // Insert regunits into PhysLiveRegs if not already live.
+ bool NewlyLive = insertIfNotLive(Reg.asMCReg());
- // Update pressure once per register if any unit of this register was not
- // live before.
- if (WasNotLive)
+ // Update pressure once per register if it became newly live.
+ if (NewlyLive)
CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 61d06a4f88cca..19fbcbb935766 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -390,9 +390,9 @@ class GCNRPTracker {
// Returns true if any unit was live (and thus erased).
bool eraseAllLiveUnits(MCRegister Reg);
- // Insert all not-live units of Reg into PhysLiveRegs.
- // Returns true if any unit was not live (and thus inserted).
- bool insertAllNotLiveUnits(MCRegister Reg);
+ // Insert units of Reg into PhysLiveRegs if not already live.
+ // Returns true if any unit was newly inserted.
+ bool insertIfNotLive(MCRegister Reg);
public:
// Enable physical register tracking only if both GCNTrackers and
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index fdb10483d9a42..4ead3098a7f68 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -1,9 +1,10 @@
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN %s
+; RUN: FileCheck --check-prefix=GCN-DEBUG %s < %t
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=NO-GCN %s
+; RUN: FileCheck --check-prefix=GENERIC-DEBUG %s < %t
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN-NOPHYS %s
+; RUN: FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s < %t
; REQUIRES: asserts
; Test that GCN trackers correctly track physical register pressure from inline asm
@@ -21,6 +22,44 @@
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+; GCN-LABEL: test_single_physreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_single_physreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_single_physreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
store i32 %val, ptr addrspace(1) %out
@@ -42,6 +81,47 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+; GCN-LABEL: test_multiple_physregs:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_add_i32 s4, s10, s11
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_multiple_physregs:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_add_i32 s4, s10, s11
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s4
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_multiple_physregs:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_add_i32 s4, s10, s11
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
%r0 = extractvalue { i32, i32 } %result, 0
@@ -66,6 +146,62 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; GCN-LABEL: test_physreg_with_vreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_mov_b32 s4, s2
+; GCN-NEXT: s_mov_b32 s5, s3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_physreg_with_vreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_mov_b32 s4, s2
+; NO-GCN-NEXT: s_mov_b32 s5, s3
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
%val = load i32, ptr addrspace(1) %in
@@ -89,6 +225,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+; GCN-LABEL: test_early_clobber:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_early_clobber:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_early_clobber:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
store i32 %val, ptr addrspace(1) %out
@@ -110,6 +284,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
+; GCN-LABEL: test_physreg_input:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_physreg_input:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s11
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_physreg_input:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
store i32 %val, ptr addrspace(1) %out
@@ -132,6 +344,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_tuple_physreg(ptr addrspace(1) %out) {
+; GCN-LABEL: test_tuple_physreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b64 s[10:11], 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_tuple_physreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b64 s[10:11], 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_tuple_physreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b64 s[10:11], 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i64 asm sideeffect "s_mov_b64 $0, 0", "={s[10:11]}"()
%lo = trunc i64 %val to i32
@@ -155,6 +405,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
define amdgpu_kernel void @test_tuple128_physreg(ptr addrspace(1) %out) {
+; GCN-LABEL: test_tuple128_physreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_tuple128_physreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s8
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_tuple128_physreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i128 asm sideeffect "s_mov_b64 $0, 0; s_mov_b64 $0+2, 0", "={s[8:11]}"()
%lo = trunc i128 %val to i32
@@ -177,6 +465,86 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
define amdgpu_kernel void @test_vreg_and_physreg_live_range_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: s_mov_b32 s0, s2
+; GCN-NEXT: s_mov_b32 s1, s3
+; GCN-NEXT: s_mov_b32 s2, s6
+; GCN-NEXT: s_mov_b32 s3, s7
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NEXT: s_mov_b32 s14, s6
+; GCN-NEXT: s_mov_b32 s15, s7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: s_mov_b32 s0, s2
+; NO-GCN-NEXT: s_mov_b32 s1, s3
+; NO-GCN-NEXT: s_mov_b32 s2, s6
+; NO-GCN-NEXT: s_mov_b32 s3, s7
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; NO-GCN-NEXT: s_mov_b32 s14, s6
+; NO-GCN-NEXT: s_mov_b32 s15, s7
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_live_range_overlap:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s14, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s15, s7
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[12:15], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
%val1 = load i32, ptr addrspace(1) %in1
@@ -189,411 +557,3 @@ entry:
store i32 %final, ptr addrspace(1) %out
ret void
}
-
-; Verify assembly output for GCN trackers
-; GCN-LABEL: test_single_physreg:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_single_physreg.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 11
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_multiple_physregs:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_add_i32 s4, s10, s11
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_multiple_physregs.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_physreg_with_vreg:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s4, s0
-; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NEXT: s_mov_b32 s4, s2
-; GCN-NEXT: s_mov_b32 s5, s3
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 13
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_early_clobber:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_early_clobber.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 11
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_physreg_input:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_mov_b32_e32 v0, s11
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_physreg_input.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_tuple_physreg:
-; GCN: ;;#ASMSTART
-; GCN: s_mov_b64 s[10:11], 0
-; GCN: ;;#ASMEND
-; GCN: v_mov_b32_e32 v0, s10
-; GCN: .set test_tuple_physreg.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_tuple128_physreg:
-; GCN: ;;#ASMSTART
-; GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
-; GCN: ;;#ASMEND
-; GCN: v_mov_b32_e32 v0, s8
-; GCN: .set test_tuple128_physreg.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
-; GCN-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s4, s0
-; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: s_mov_b32 s0, s2
-; GCN-NEXT: s_mov_b32 s1, s3
-; GCN-NEXT: s_mov_b32 s2, s6
-; GCN-NEXT: s_mov_b32 s3, s7
-; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NEXT: s_mov_b32 s14, s6
-; GCN-NEXT: s_mov_b32 s15, s7
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
-; GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
-; GCN: TotalNumSgprs: 18
-; GCN: NumVgprs: 2
-
-; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
-; GCN-NOPHYS-LABEL: test_single_physreg:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_single_physreg.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 11
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_multiple_physregs:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: s_add_i32 s4, s10, s11
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_multiple_physregs.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
-; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
-; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
-; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
-; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 13
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_early_clobber:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_early_clobber.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 11
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_physreg_input:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s11
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_physreg_input.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_tuple_physreg:
-; GCN-NOPHYS: ;;#ASMSTART
-; GCN-NOPHYS: s_mov_b64 s[10:11], 0
-; GCN-NOPHYS: ;;#ASMEND
-; GCN-NOPHYS: v_mov_b32_e32 v0, s10
-; GCN-NOPHYS: .set test_tuple_physreg.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_tuple128_physreg:
-; GCN-NOPHYS: ;;#ASMSTART
-; GCN-NOPHYS: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
-; GCN-NOPHYS: ;;#ASMEND
-; GCN-NOPHYS: v_mov_b32_e32 v0, s8
-; GCN-NOPHYS: .set test_tuple128_physreg.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_vreg_and_physreg_live_range_overlap:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
-; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
-; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
-; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
-; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
-; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_mov_b32 s14, s6
-; GCN-NOPHYS-NEXT: s_mov_b32 s15, s7
-; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s11, v0
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[12:15], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
-; GCN-NOPHYS: TotalNumSgprs: 18
-; GCN-NOPHYS: NumVgprs: 2
-
-; Verify assembly output without GCN trackers (should be identical)
-; NO-GCN-LABEL: test_single_physreg:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_single_physreg.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 11
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_multiple_physregs:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: s_add_i32 s4, s10, s11
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s4
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_multiple_physregs.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_physreg_with_vreg:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s6, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: s_mov_b32 s4, s0
-; NO-GCN-NEXT: s_mov_b32 s5, s1
-; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; NO-GCN-NEXT: s_mov_b32 s4, s2
-; NO-GCN-NEXT: s_mov_b32 s5, s3
-; NO-GCN-NEXT: s_waitcnt vmcnt(0)
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 13
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_early_clobber:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_early_clobber.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 11
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_physreg_input:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s11
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_physreg_input.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_tuple_physreg:
-; NO-GCN: ;;#ASMSTART
-; NO-GCN: s_mov_b64 s[10:11], 0
-; NO-GCN: ;;#ASMEND
-; NO-GCN: v_mov_b32_e32 v0, s10
-; NO-GCN: .set test_tuple_physreg.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_tuple128_physreg:
-; NO-GCN: ;;#ASMSTART
-; NO-GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
-; NO-GCN: ;;#ASMEND
-; NO-GCN: v_mov_b32_e32 v0, s8
-; NO-GCN: .set test_tuple128_physreg.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; NO-GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
-; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s6, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: s_mov_b32 s4, s0
-; NO-GCN-NEXT: s_mov_b32 s5, s1
-; NO-GCN-NEXT: s_mov_b32 s0, s2
-; NO-GCN-NEXT: s_mov_b32 s1, s3
-; NO-GCN-NEXT: s_mov_b32 s2, s6
-; NO-GCN-NEXT: s_mov_b32 s3, s7
-; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; NO-GCN-NEXT: s_mov_b32 s14, s6
-; NO-GCN-NEXT: s_mov_b32 s15, s7
-; NO-GCN-NEXT: s_waitcnt vmcnt(0)
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
-; NO-GCN: TotalNumSgprs: 18
-; NO-GCN: NumVgprs: 2
>From 061011cf01476ad5da108b82268cb803d0931af3 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Wed, 25 Mar 2026 00:44:04 -0500
Subject: [PATCH 04/25] Added test for early clobber with a tuple register.
---
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 4ead3098a7f68..7927712f36b5c 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -269,6 +269,77 @@ entry:
ret void
}
+; Test early-clobber constraint with a tuple (64-bit) register.
+; The input s12 and early-clobber output s[10:11] have distinct live ranges.
+
+; GCN-DEBUG-LABEL: test_early_clobber_tuple
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
+
+; GENERIC-DEBUG-LABEL: test_early_clobber_tuple
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber_tuple
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber_tuple(ptr addrspace(1) %out) {
+; GCN-LABEL: test_early_clobber_tuple:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s12, 42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b64 s[10:11], s12
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_early_clobber_tuple:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s12, 42
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b64 s[10:11], s12
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_early_clobber_tuple:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s12, 42
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b64 s[10:11], s12
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+entry:
+ %in = call i32 asm sideeffect "s_mov_b32 $0, 42", "={s12}"()
+ %val = call i64 asm sideeffect "s_mov_b64 $0, $1", "=&{s[10:11]},{s12}"(i32 %in)
+ %lo = trunc i64 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
+ ret void
+}
+
; Test physical register input
; GCN-DEBUG-LABEL: test_physreg_input
>From c897e7702cc3a7b0c33e3658fa4e4be40e43c219 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 30 Mar 2026 17:50:04 -0500
Subject: [PATCH 05/25] Folded multiple RUN lines into one while maintaining
debug msgs.
---
.../CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 252875377d1ea..98e04ea74a993 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 < %s | FileCheck --check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN-GCNTRACKERS %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN %s
+; RUN: FileCheck --check-prefix=SCHED %s < %t
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN-GCNTRACKERS %s
+; RUN: FileCheck --check-prefix=SCHED-GCNTRACKERS %s < %t
; REQUIRES: asserts
; CHECK-LABEL: {{^}}spill:
; GCN: NumSgprs: 104
>From 670ea40fbdd6b4e2d58453fd2692e19202231b47 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 30 Mar 2026 19:35:09 -0500
Subject: [PATCH 06/25] Added early clobber support to physical register
tracking.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 2fc2fc98d1631..7c18b0f806ade 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -705,6 +705,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// Track physical register defs and uses (only if enabled).
if (TrackPhysRegs) {
+ GCNRegPressure ECPhysDefPressure;
+ bool HasECPhysDefs = false;
+
// Kill physical register defs (moving backward in upward tracking).
for (const MachineOperand &MO : MI.all_defs()) {
if (!MO.getReg().isPhysical())
@@ -713,6 +716,11 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (!MRI->isAllocatable(Reg))
continue;
+ if (MO.isEarlyClobber()) {
+ ECPhysDefPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ HasECPhysDefs = true;
+ }
+
// Check if any unit of this register was live before and if so,
// erase all of the regunits from PhysLiveRegs.
bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
@@ -738,7 +746,10 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
- MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
+ // Early-clobber physical defs are live alongside uses.
+ MaxPhysPressure = HasECPhysDefs ? max(CurPhysPressure + ECPhysDefPressure,
+ MaxPhysPressure)
+ : max(CurPhysPressure, MaxPhysPressure);
}
assert(CurPressure == getVirtRegPressure(*MRI, VirtLiveRegs));
>From 7ad2c298809010c55776acfb94a18a191ea7a7ce Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 31 Mar 2026 04:21:00 -0500
Subject: [PATCH 07/25] Added a new test for physical register early clobber.
---
.../regpressure-physreg-early-clobber.mir | 43 +++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir
new file mode 100644
index 0000000000000..516db40a5af76
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir
@@ -0,0 +1,43 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 %s -filetype=null 2>&1 | FileCheck %s --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 %s -filetype=null 2>&1 | FileCheck %s --check-prefix=RPU-NOPHYS
+
+# Test that the upward register pressure tracker accounts for early-clobber
+# physical register defs overlapping with physical register uses.
+# With physreg tracking, the EC def s[10:11] (2 SGPRs) overlaps with the use
+# s12 (1 SGPR), producing higher max pressure at the INLINEASM instruction.
+
+---
+name: ec_physreg
+tracksRegLiveness: true
+machineFunctionInfo:
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; RPU-LABEL: name: ec_physreg
+ ; RPU: bb.0:
+ ; RPU: SGPR VGPR
+ ; RPU: 3 1
+ ; RPU: 5 1 INLINEASM &"s_mov_b64 $0, $1" {{.*}} early-clobber $sgpr10_sgpr11
+ ; RPU: 4 1
+ ;
+ ; RPU-NOPHYS-LABEL: name: ec_physreg
+ ; RPU-NOPHYS: bb.0:
+ ; RPU-NOPHYS: SGPR VGPR
+ ; RPU-NOPHYS: 2 1
+ ; RPU-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, $1" {{.*}} early-clobber $sgpr10_sgpr11
+ ; RPU-NOPHYS: 2 1
+ bb.0:
+ liveins: $sgpr8_sgpr9
+
+ %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ INLINEASM &"s_mov_b32 $0, 42", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr12
+ %3:sreg_32 = COPY $sgpr12
+ $sgpr12 = COPY %3
+ INLINEASM &"s_mov_b64 $0, $1", 1 /* sideeffect attdialect */, 11 /* regdef-ec */, implicit-def early-clobber $sgpr10_sgpr11, 9 /* reguse */, $sgpr12
+ %4:sreg_64 = COPY $sgpr10_sgpr11
+ %5:sreg_32 = COPY %4.sub0
+ %6:vgpr_32 = COPY %5
+ GLOBAL_STORE_DWORD_SADDR %2, %6, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
>From 502fb6866c23dc3ae09a4e8d0f499e7a6ae11eff Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 31 Mar 2026 17:14:32 -0500
Subject: [PATCH 08/25] Added an SIRegisterInfo member to GCNRPTracker and its
uses.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 34 +++++++++++------------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 12 ++++----
2 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 7c18b0f806ade..38fe20dd90955 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -500,28 +500,25 @@ bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
}
bool GCNRPTracker::allRegUnitsLive(MCRegister Reg) const {
- assert(MRI && "MRI not initialized");
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ assert(SRI && "SRI not initialized");
const BitVector &Units = PhysLiveRegs.getBitVector();
- return llvm::all_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return llvm::all_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
return Units.test(static_cast<unsigned>(Unit));
});
}
bool GCNRPTracker::checkRegKilled(MCRegister Reg, SlotIndex SI) const {
- assert(MRI && "MRI not initialized");
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ assert(SRI && "SRI not initialized");
const BitVector &Units = PhysLiveRegs.getBitVector();
- return llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
return Units.test(static_cast<unsigned>(Unit)) && !isUnitLiveAt(Unit, SI);
});
}
bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
- assert(MRI && "MRI not initialized");
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ assert(SRI && "SRI not initialized");
BitVector KilledUnits(PhysLiveRegs.getBitVector().size(), false);
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ for (MCRegUnit Unit : SRI->regunits(Reg)) {
unsigned U = static_cast<unsigned>(Unit);
if (PhysLiveRegs.getBitVector().test(U) && !isUnitLiveAt(Unit, SI))
KilledUnits.set(U);
@@ -533,10 +530,9 @@ bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
}
bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
- assert(MRI && "MRI not initialized");
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ assert(SRI && "SRI not initialized");
const BitVector &Units = PhysLiveRegs.getBitVector();
- bool WasLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ bool WasLive = llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
return Units.test(static_cast<unsigned>(Unit));
});
if (WasLive)
@@ -545,10 +541,9 @@ bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
}
bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
- assert(MRI && "MRI not initialized");
- const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ assert(SRI && "SRI not initialized");
const BitVector &Units = PhysLiveRegs.getBitVector();
- bool NewlyLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ bool NewlyLive = llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
return !Units.test(static_cast<unsigned>(Unit));
});
if (NewlyLive)
@@ -596,6 +591,7 @@ void GCNRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *VirtLiveRegsCopy, bool After) {
const MachineFunction &MF = *MI.getMF();
MRI = &MF.getRegInfo();
+ SRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
if (VirtLiveRegsCopy) {
if (&VirtLiveRegs != VirtLiveRegsCopy)
VirtLiveRegs = *VirtLiveRegsCopy;
@@ -610,7 +606,7 @@ void GCNRPTracker::reset(const MachineInstr &MI,
// Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
- PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
+ PhysLiveRegs.init(*SRI);
MaxPhysPressure.clear();
CurPhysPressure.clear();
}
@@ -619,6 +615,7 @@ void GCNRPTracker::reset(const MachineInstr &MI,
void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
const LiveRegSet &VirtLiveRegsSet) {
MRI = &MRInfo;
+ SRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
VirtLiveRegs = VirtLiveRegsSet;
LastTrackedMI = nullptr;
MaxPressure = CurPressure = getVirtRegPressure(MRInfo, VirtLiveRegsSet);
@@ -627,7 +624,7 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
// Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
- PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
+ PhysLiveRegs.init(*SRI);
MaxPhysPressure.clear();
CurPhysPressure.clear();
}
@@ -761,6 +758,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
const LiveRegSet *VirtLiveRegsCopy) {
MRI = &MI.getMF()->getRegInfo();
+ SRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
LastTrackedMI = nullptr;
MBBEnd = MI.getParent()->end();
NextMI = &MI;
@@ -1072,7 +1070,7 @@ bool GCNUpwardRPTracker::isValid() const {
dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
" LIS reported livesets mismatch:\n"
<< print(LISLR, *MRI);
- reportMismatch(LISLR, TrackedLR, MRI->getTargetRegisterInfo());
+ reportMismatch(LISLR, TrackedLR, SRI);
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 19fbcbb935766..c08f43f870b59 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -342,12 +342,14 @@ class GCNRPTracker {
const MachineInstr *LastTrackedMI = nullptr;
mutable const MachineRegisterInfo *MRI = nullptr;
+ const SIRegisterInfo *SRI = nullptr;
GCNRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
- : LIS(LIS), MRI(&MRI) {
+ : LIS(LIS), MRI(&MRI),
+ SRI(static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo())) {
setPhysRegTracking();
if (TrackPhysRegs)
- PhysLiveRegs.init(*MRI.getTargetRegisterInfo());
+ PhysLiveRegs.init(*SRI);
}
// Copy constructor - PhysLiveRegs must be initialized then copied.
@@ -358,10 +360,10 @@ class GCNRPTracker {
CurPhysPressure(Other.CurPhysPressure),
MaxPhysPressure(Other.MaxPhysPressure),
TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
- MRI(Other.MRI) {
+ MRI(Other.MRI), SRI(Other.SRI) {
if (TrackPhysRegs) {
- assert(MRI && "MRI not initialized");
- PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
+ assert(SRI && "SRI not initialized");
+ PhysLiveRegs.init(*SRI);
PhysLiveRegs.addUnits(Other.PhysLiveRegs.getBitVector());
}
}
>From 357ace7dc88476ced9d46eb37b7f6a0eea36d5e9 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 31 Mar 2026 18:25:55 -0500
Subject: [PATCH 09/25] Removed invert.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 10 +++++-----
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 4 ++--
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 38fe20dd90955..88a7ee386f72d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -499,11 +499,11 @@ bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
return !LR || LR->liveAt(SI);
}
-bool GCNRPTracker::allRegUnitsLive(MCRegister Reg) const {
+bool GCNRPTracker::isAnyRegUnitNotLive(MCRegister Reg) const {
assert(SRI && "SRI not initialized");
const BitVector &Units = PhysLiveRegs.getBitVector();
- return llvm::all_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
- return Units.test(static_cast<unsigned>(Unit));
+ return llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return !Units.test(static_cast<unsigned>(Unit));
});
}
@@ -889,7 +889,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
// Check if any unit of this register is not live before and if so,
// insert all of the regunits into PhysLiveRegs.
- bool WasNotLive = !allRegUnitsLive(Reg.asMCReg());
+ bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
if (WasNotLive && !MO.isDead())
PhysLiveRegs.addReg(Reg);
@@ -1029,7 +1029,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
continue;
// Check if any unit of this register is not currently live.
- bool WasNotLive = !allRegUnitsLive(Reg.asMCReg());
+ bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
// Update pressure once per register if any unit of this register is not
// live before.
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index c08f43f870b59..d344d50c8749d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -379,8 +379,8 @@ class GCNRPTracker {
// Helper to check if a register unit is live at a given slot index.
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
- // Check if all register units of Reg are currently live in PhysLiveRegs.
- bool allRegUnitsLive(MCRegister Reg) const;
+ // Check if any register unit of Reg is not currently live in PhysLiveRegs.
+ bool isAnyRegUnitNotLive(MCRegister Reg) const;
// Check if Reg has any killed units at the given slot index.
bool checkRegKilled(MCRegister Reg, SlotIndex SI) const;
>From 1e6b5b70df98ee929a19c74f2bdc50fc96431791 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Thu, 23 Apr 2026 12:26:50 -0500
Subject: [PATCH 10/25] Unified GCNRegPressure objects for virtual and
physical.
Augmented physical live register representation.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 280 +++++++-----------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 88 +++---
.../machine-scheduler-sink-trivial-remats.mir | 8 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 6 +-
4 files changed, 165 insertions(+), 217 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 88a7ee386f72d..a2ca0677986ca 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -517,6 +517,11 @@ bool GCNRPTracker::checkRegKilled(MCRegister Reg, SlotIndex SI) const {
bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
assert(SRI && "SRI not initialized");
+ // Due to aliasing, a physical register may not be present in
+ // PhysLiveRegs.Regs, but one of its regunits may show up as killed. Return
+ // early in this case.
+ if (!PhysLiveRegs.Regs.contains(Reg))
+ return false;
BitVector KilledUnits(PhysLiveRegs.getBitVector().size(), false);
for (MCRegUnit Unit : SRI->regunits(Reg)) {
unsigned U = static_cast<unsigned>(Unit);
@@ -525,19 +530,16 @@ bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
}
if (KilledUnits.none())
return false;
- PhysLiveRegs.removeUnits(KilledUnits);
+ PhysLiveRegs.remove(KilledUnits, Reg);
return true;
}
bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
assert(SRI && "SRI not initialized");
- const BitVector &Units = PhysLiveRegs.getBitVector();
- bool WasLive = llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
- return Units.test(static_cast<unsigned>(Unit));
- });
- if (WasLive)
- PhysLiveRegs.removeReg(Reg);
- return WasLive;
+ if (!PhysLiveRegs.Regs.contains(Reg))
+ return false;
+ PhysLiveRegs.remove(Reg);
+ return true;
}
bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
@@ -547,10 +549,17 @@ bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
return !Units.test(static_cast<unsigned>(Unit));
});
if (NewlyLive)
- PhysLiveRegs.addReg(Reg);
+ PhysLiveRegs.add(Reg);
return NewlyLive;
}
+GCNRegPressure GCNRPTracker::constructPhysRegPressure() const {
+ GCNRegPressure Res;
+ for (MCRegister Reg : PhysLiveRegs.Regs)
+ Res.inc(Reg, /*IsAdd=*/true, *MRI);
+ return Res;
+}
+
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter) {
@@ -603,12 +612,9 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getVirtRegPressure(*MRI, VirtLiveRegs);
setPhysRegTracking();
- // Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
PhysLiveRegs.init(*SRI);
- MaxPhysPressure.clear();
- CurPhysPressure.clear();
}
}
@@ -621,12 +627,9 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
MaxPressure = CurPressure = getVirtRegPressure(MRInfo, VirtLiveRegsSet);
setPhysRegTracking();
- // Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
PhysLiveRegs.init(*SRI);
- MaxPhysPressure.clear();
- CurPhysPressure.clear();
}
}
@@ -655,29 +658,37 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
GCNRegPressure DefPressure, ECDefPressure;
bool HasECDefs = false;
for (const MachineOperand &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual())
- continue;
-
Register Reg = MO.getReg();
- LaneBitmask DefMask = getDefRegMask(MO, *MRI);
- // Treat a def as fully live at the moment of definition: keep a record.
- if (MO.isEarlyClobber()) {
- ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
- HasECDefs = true;
- } else
- DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+ if (Reg.isVirtual()) {
+ LaneBitmask DefMask = getDefRegMask(MO, *MRI);
- auto I = VirtLiveRegs.find(Reg);
- if (I == VirtLiveRegs.end())
- continue;
+ if (MO.isEarlyClobber()) {
+ ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+ HasECDefs = true;
+ } else
+ DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
- LaneBitmask &LiveMask = I->second;
- LaneBitmask PrevMask = LiveMask;
- LiveMask &= ~DefMask;
- CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
- if (LiveMask.none())
- VirtLiveRegs.erase(I);
+ auto I = VirtLiveRegs.find(Reg);
+ if (I == VirtLiveRegs.end())
+ continue;
+
+ LaneBitmask &LiveMask = I->second;
+ LaneBitmask PrevMask = LiveMask;
+ LiveMask &= ~DefMask;
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ if (LiveMask.none())
+ VirtLiveRegs.erase(I);
+ } else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
+ if (MO.isEarlyClobber()) {
+ ECDefPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ HasECDefs = true;
+ }
+
+ bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
+ if (WasLive)
+ CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
+ }
}
// Update MaxPressure with defs pressure.
@@ -696,60 +707,27 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPressure.inc(U.VRegOrUnit.asVirtualReg(), PrevMask, LiveMask, *MRI);
}
- // Update MaxPressure with uses plus early-clobber defs pressure.
- MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
- : max(CurPressure, MaxPressure);
-
- // Track physical register defs and uses (only if enabled).
if (TrackPhysRegs) {
- GCNRegPressure ECPhysDefPressure;
- bool HasECPhysDefs = false;
-
- // Kill physical register defs (moving backward in upward tracking).
- for (const MachineOperand &MO : MI.all_defs()) {
- if (!MO.getReg().isPhysical())
- continue;
- Register Reg = MO.getReg();
- if (!MRI->isAllocatable(Reg))
- continue;
-
- if (MO.isEarlyClobber()) {
- ECPhysDefPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
- HasECPhysDefs = true;
- }
-
- // Check if any unit of this register was live before and if so,
- // erase all of the regunits from PhysLiveRegs.
- bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
-
- // Update pressure once per register if any unit of this register was live
- // before.
- if (WasLive)
- CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
- }
-
- // Make physical register uses alive (moving backward in upward tracking).
+ // Physical register handling needs the register directly to avoid aliasing,
+ // so we need to iterate over all uses.
for (const MachineOperand &MO : MI.all_uses()) {
if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
continue;
Register Reg = MO.getReg();
if (!MRI->isAllocatable(Reg))
continue;
- // Insert regunits into PhysLiveRegs if not already live.
bool NewlyLive = insertIfNotLive(Reg.asMCReg());
-
- // Update pressure once per register if it became newly live.
if (NewlyLive)
- CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
-
- // Early-clobber physical defs are live alongside uses.
- MaxPhysPressure = HasECPhysDefs ? max(CurPhysPressure + ECPhysDefPressure,
- MaxPhysPressure)
- : max(CurPhysPressure, MaxPhysPressure);
}
- assert(CurPressure == getVirtRegPressure(*MRI, VirtLiveRegs));
+ // Update MaxPressure with uses plus early-clobber defs pressure.
+ MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
+ : max(CurPressure, MaxPressure);
+
+ assert(CurPressure ==
+ getVirtRegPressure(*MRI, VirtLiveRegs) + constructPhysRegPressure());
}
////////////////////////////////////////////////////////////////////////////////
@@ -793,65 +771,51 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
// Remove dead registers or mask bits.
SmallSet<Register, 8> SeenRegs;
- for (auto &MO : CurrMI->operands()) {
- if (!MO.isReg() || !MO.getReg().isVirtual())
- continue;
- if (MO.isUse() && !MO.readsReg())
- continue;
- if (!UseInternalIterator && MO.isDef())
+ for (const auto &MO : CurrMI->operands()) {
+ if (!MO.isReg())
continue;
- if (!SeenRegs.insert(MO.getReg()).second)
- continue;
- const LiveInterval &LI = LIS.getInterval(MO.getReg());
- if (LI.hasSubRanges()) {
- auto It = VirtLiveRegs.end();
- for (const auto &S : LI.subranges()) {
- if (!S.liveAt(SI)) {
- if (It == VirtLiveRegs.end()) {
- It = VirtLiveRegs.find(MO.getReg());
- if (It == VirtLiveRegs.end())
- llvm_unreachable("register isn't live");
+ Register Reg = MO.getReg();
+
+ if (Reg.isVirtual()) {
+ if (MO.isUse() && !MO.readsReg())
+ continue;
+ if (!UseInternalIterator && MO.isDef())
+ continue;
+ if (!SeenRegs.insert(Reg).second)
+ continue;
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ auto It = VirtLiveRegs.end();
+ for (const auto &S : LI.subranges()) {
+ if (!S.liveAt(SI)) {
+ if (It == VirtLiveRegs.end()) {
+ It = VirtLiveRegs.find(Reg);
+ if (It == VirtLiveRegs.end())
+ llvm_unreachable("register isn't live");
+ }
+ auto PrevMask = It->second;
+ It->second &= ~S.LaneMask;
+ CurPressure.inc(Reg, PrevMask, It->second, *MRI);
}
- auto PrevMask = It->second;
- It->second &= ~S.LaneMask;
- CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
}
- }
- if (It != VirtLiveRegs.end() && It->second.none())
+ if (It != VirtLiveRegs.end() && It->second.none())
+ VirtLiveRegs.erase(It);
+ } else if (!LI.liveAt(SI)) {
+ auto It = VirtLiveRegs.find(Reg);
+ if (It == VirtLiveRegs.end())
+ llvm_unreachable("register isn't live");
+ CurPressure.inc(Reg, It->second, LaneBitmask::getNone(), *MRI);
VirtLiveRegs.erase(It);
- } else if (!LI.liveAt(SI)) {
- auto It = VirtLiveRegs.find(MO.getReg());
- if (It == VirtLiveRegs.end())
- llvm_unreachable("register isn't live");
- CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
- VirtLiveRegs.erase(It);
- }
- }
-
- // Track physical register kills (only if enabled).
- if (TrackPhysRegs) {
- // Iterate over actual instruction operands to track which regunits are
- // killed.
- SmallSet<Register, 8> SeenRegs;
- for (const auto &MO : CurrMI->operands()) {
- if (!MO.isReg() || !MO.getReg().isPhysical())
- continue;
- Register Reg = MO.getReg();
- if (!MRI->isAllocatable(Reg) || !SeenRegs.insert(Reg).second)
+ }
+ } else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
+ if (!SeenRegs.insert(Reg).second)
continue;
-
- // Check if any unit of this register is killed and erase killed units.
- bool IsKilled = eraseKilledUnits(Reg.asMCReg(), SI);
-
- // Update pressure once per register if any unit of this register is
- // killed.
- if (IsKilled)
- CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
+ if (eraseKilledUnits(Reg.asMCReg(), SI))
+ CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
}
MaxPressure = max(MaxPressure, CurPressure);
- MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
LastTrackedMI = nullptr;
@@ -869,39 +833,25 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
const MachineInstr *CurrMI = LastTrackedMI;
- // Add new registers or mask bits (virtual registers).
+ // Add new registers or mask bits.
for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
- if (!Reg.isVirtual())
- continue;
- auto &LiveMask = VirtLiveRegs[Reg];
- auto PrevMask = LiveMask;
- LiveMask |= getDefRegMask(MO, *MRI);
- CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
- }
-
- // Add new physical register defs (only if enabled).
- if (TrackPhysRegs) {
- for (const auto &MO : CurrMI->all_defs()) {
- Register Reg = MO.getReg();
- if (!Reg.isPhysical() || !MRI->isAllocatable(Reg))
- continue;
- // Check if any unit of this register is not live before and if so,
- // insert all of the regunits into PhysLiveRegs.
+ if (Reg.isVirtual()) {
+ auto &LiveMask = VirtLiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask |= getDefRegMask(MO, *MRI);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ } else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
- if (WasNotLive && !MO.isDead())
- PhysLiveRegs.addReg(Reg);
-
- // Update pressure once per register if any unit of this register is not
- // live before.
- if (WasNotLive && !MO.isDead())
- CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ if (WasNotLive && !MO.isDead()) {
+ PhysLiveRegs.add(Reg);
+ CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ }
}
}
MaxPressure = max(MaxPressure, CurPressure);
- MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
}
bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator) {
@@ -968,7 +918,6 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
GCNRegPressure TempPressure = CurPressure;
- GCNRegPressure TempPhysPressure = CurPhysPressure;
// Process virtual register uses
for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
@@ -1020,24 +969,19 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
// Process physical registers (only if enabled).
if (TrackPhysRegs) {
SmallSet<Register, 8> SeenRegs;
-
- // Process physical register defs.
+ // Physical register handling needs the registers directly to avoid
+ // aliasing, so we need to iterate over the defs and uses separately.
for (const auto &MO : MI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
!SeenRegs.insert(Reg).second)
continue;
- // Check if any unit of this register is not currently live.
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
-
- // Update pressure once per register if any unit of this register is not
- // live before.
if (WasNotLive && !MO.isDead())
- TempPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ TempPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
- // Process physical register uses to find kills.
SeenRegs.clear();
for (const auto &MO : MI->all_uses()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
@@ -1047,18 +991,13 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
!SeenRegs.insert(Reg).second)
continue;
- // Check if any unit of this register is killed.
bool IsKilled = checkRegKilled(Reg.asMCReg(), SlotIdx);
-
- // Update pressure once per register if any unit of this register is
- // killed.
if (IsKilled)
- TempPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
+ TempPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
}
- // Return sum of virtual and physical pressure
- return TempPressure + TempPhysPressure;
+ return TempPressure;
}
bool GCNUpwardRPTracker::isValid() const {
@@ -1074,10 +1013,11 @@ bool GCNUpwardRPTracker::isValid() const {
return false;
}
- auto LISPressure = getVirtRegPressure(*MRI, LISLR);
- if (LISPressure != CurPressure) {
+ auto ComputedPressure =
+ getVirtRegPressure(*MRI, LISLR) + constructPhysRegPressure();
+ if (ComputedPressure != CurPressure) {
dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: "
- << print(CurPressure) << "LIS rpt: " << print(LISPressure);
+ << print(CurPressure) << "Computed rpt: " << print(ComputedPressure);
return false;
}
return true;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index d344d50c8749d..3ad8eb8f094b9 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -326,15 +326,41 @@ class GCNRPTracker {
protected:
const LiveIntervals &LIS;
- // Virtual register tracking
LiveRegSet VirtLiveRegs;
- GCNRegPressure CurPressure, MaxPressure;
- // Physical register tracking: Maintain clean separation between virtual and
- // physical registers. Tracking physical registers can be turned OFF with an
- // option. Uses LiveRegUnits (bit vector of live register units).
- LiveRegUnits PhysLiveRegs;
- GCNRegPressure CurPhysPressure, MaxPhysPressure;
+ // Physical register liveness: Units provides O(1) unit-level alias checks,
+ // Regs tracks which register names contributed to pressure for cheap
+ // reconstruction. Both must be kept in sync.
+ struct PhysicalRegLiveness {
+ LiveRegUnits Units;
+ SmallDenseSet<MCRegister, 16> Regs;
+
+ void init(const TargetRegisterInfo &TRI) {
+ Units.init(TRI);
+ Regs.clear();
+ }
+ void clear() {
+ Units.clear();
+ Regs.clear();
+ }
+ const BitVector &getBitVector() const { return Units.getBitVector(); }
+
+ void add(Register Reg) {
+ Units.addReg(Reg);
+ Regs.insert(Reg.asMCReg());
+ }
+ void remove(Register Reg) {
+ Units.removeReg(Reg);
+ Regs.erase(Reg.asMCReg());
+ }
+ void remove(const BitVector &KilledUnits, MCRegister Reg) {
+ Units.removeUnits(KilledUnits);
+ Regs.erase(Reg);
+ }
+ };
+ PhysicalRegLiveness PhysLiveRegs;
+
+ GCNRegPressure CurPressure, MaxPressure;
// Flag to control whether physical register tracking is active.
// Set to true when GCNTrackers are enabled, false otherwise.
@@ -352,19 +378,17 @@ class GCNRPTracker {
PhysLiveRegs.init(*SRI);
}
- // Copy constructor - PhysLiveRegs must be initialized then copied.
+ // Copy constructor - PhysLiveRegs.Units must be initialized then copied.
GCNRPTracker(const GCNRPTracker &Other)
: LIS(Other.LIS), VirtLiveRegs(Other.VirtLiveRegs),
- CurPressure(Other.CurPressure),
- MaxPressure(Other.MaxPressure),
- CurPhysPressure(Other.CurPhysPressure),
- MaxPhysPressure(Other.MaxPhysPressure),
+ CurPressure(Other.CurPressure), MaxPressure(Other.MaxPressure),
TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
MRI(Other.MRI), SRI(Other.SRI) {
if (TrackPhysRegs) {
assert(SRI && "SRI not initialized");
PhysLiveRegs.init(*SRI);
- PhysLiveRegs.addUnits(Other.PhysLiveRegs.getBitVector());
+ PhysLiveRegs.Units.addUnits(Other.PhysLiveRegs.getBitVector());
+ PhysLiveRegs.Regs = Other.PhysLiveRegs.Regs;
}
}
@@ -376,12 +400,15 @@ class GCNRPTracker {
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const;
- // Helper to check if a register unit is live at a given slot index.
+ // Check if a register unit is live at a given slot index per LIS.
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
// Check if any register unit of Reg is not currently live in PhysLiveRegs.
bool isAnyRegUnitNotLive(MCRegister Reg) const;
+ // Reconstruct physical register pressure from PhysLiveRegs.Regs.
+ GCNRegPressure constructPhysRegPressure() const;
+
// Check if Reg has any killed units at the given slot index.
bool checkRegKilled(MCRegister Reg, SlotIndex SI) const;
@@ -410,26 +437,9 @@ class GCNRPTracker {
const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
- void clearMaxPressure() {
- MaxPressure.clear();
- MaxPhysPressure.clear();
- }
-
- // Returns sum of virtual and physical register pressure
- GCNRegPressure getPressure() const {
- return CurPressure + CurPhysPressure;
- }
-
- // Returns only virtual register pressure
- GCNRegPressure getVirtPressure() const { return CurPressure; }
-
- // Returns only physical register pressure
- GCNRegPressure getPhysPressure() const { return CurPhysPressure; }
+ void clearMaxPressure() { MaxPressure.clear(); }
- // Returns sum of virtual and physical max pressure
- GCNRegPressure getMaxPressure() const {
- return MaxPressure + MaxPhysPressure;
- }
+ GCNRegPressure getPressure() const { return CurPressure; }
decltype(VirtLiveRegs) moveVirtLiveRegs() { return std::move(VirtLiveRegs); }
};
@@ -474,13 +484,12 @@ class GCNUpwardRPTracker : public GCNRPTracker {
/// to reported by LIS.
bool isValid() const;
- void resetMaxPressure() {
- MaxPressure = CurPressure;
- MaxPhysPressure = CurPhysPressure;
- }
+ const GCNRegPressure &getMaxPressure() const { return MaxPressure; }
+
+ void resetMaxPressure() { MaxPressure = CurPressure; }
GCNRegPressure getMaxPressureAndReset() {
- GCNRegPressure RP = getMaxPressure();
+ GCNRegPressure RP = MaxPressure;
resetMaxPressure();
return RP;
}
@@ -505,9 +514,8 @@ class GCNDownwardRPTracker : public GCNRPTracker {
/// \p return MaxPressure and clear it.
GCNRegPressure moveMaxPressure() {
- auto Res = getMaxPressure();
+ auto Res = MaxPressure;
MaxPressure.clear();
- MaxPhysPressure.clear();
return Res;
}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 2d2ff828d3ef5..496279f330063 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -11208,15 +11208,15 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 7927712f36b5c..534b22aa1c576 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -70,7 +70,7 @@ entry:
; GCN-DEBUG-LABEL: test_multiple_physregs
; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_multiple_physregs
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
@@ -274,7 +274,7 @@ entry:
; GCN-DEBUG-LABEL: test_early_clobber_tuple
; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
; GENERIC-DEBUG-LABEL: test_early_clobber_tuple
; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
@@ -525,7 +525,7 @@ entry:
; GCN-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 16, LVGPR WT: 0, LSGPR WT: 16
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 13, LVGPR WT: 0, LSGPR WT: 16
; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
>From 9d1e1259f154f289e1252b31b2e63a8314f9b591 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Thu, 23 Apr 2026 19:42:26 -0500
Subject: [PATCH 11/25] Always clear PhysLiveRegs even when TrackPhysRegs is
false. Added debug output if assert fails.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 25 +++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index a2ca0677986ca..eb4372e6b9b1a 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -612,10 +612,11 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getVirtRegPressure(*MRI, VirtLiveRegs);
setPhysRegTracking();
- if (TrackPhysRegs) {
- PhysLiveRegs.clear();
+ // Always clear PhysLiveRegs even when TrackPhysRegs is false, to avoid
+ // stale data if physical tracking was previously enabled.
+ PhysLiveRegs.clear();
+ if (TrackPhysRegs)
PhysLiveRegs.init(*SRI);
- }
}
void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
@@ -627,10 +628,11 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
MaxPressure = CurPressure = getVirtRegPressure(MRInfo, VirtLiveRegsSet);
setPhysRegTracking();
- if (TrackPhysRegs) {
- PhysLiveRegs.clear();
+ // Always clear PhysLiveRegs even when TrackPhysRegs is false, to avoid
+ // stale data if physical tracking was previously enabled.
+ PhysLiveRegs.clear();
+ if (TrackPhysRegs)
PhysLiveRegs.init(*SRI);
- }
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
@@ -726,8 +728,15 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
: max(CurPressure, MaxPressure);
- assert(CurPressure ==
- getVirtRegPressure(*MRI, VirtLiveRegs) + constructPhysRegPressure());
+ auto VirtPressure = getVirtRegPressure(*MRI, VirtLiveRegs);
+ auto PhysPressure = constructPhysRegPressure();
+ assert(CurPressure == VirtPressure + PhysPressure ||
+ (dbgs() << "Pressure mismatch in recede()\nMI: " << MI
+ << "Tracked: " << print(CurPressure)
+ << "Expected: " << print(VirtPressure + PhysPressure)
+ << "Virt: " << print(VirtPressure)
+ << "Phys: " << print(PhysPressure),
+ false));
}
////////////////////////////////////////////////////////////////////////////////
>From ff3a00298aa18649eb303a0a0cfa34ce6408a7cd Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 24 Apr 2026 12:00:47 -0500
Subject: [PATCH 12/25] clang-format fix.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index eb4372e6b9b1a..3a686b7b0d2c5 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -732,10 +732,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
auto PhysPressure = constructPhysRegPressure();
assert(CurPressure == VirtPressure + PhysPressure ||
(dbgs() << "Pressure mismatch in recede()\nMI: " << MI
- << "Tracked: " << print(CurPressure)
- << "Expected: " << print(VirtPressure + PhysPressure)
- << "Virt: " << print(VirtPressure)
- << "Phys: " << print(PhysPressure),
+ << "Tracked: " << print(CurPressure) << "Expected: "
+ << print(VirtPressure + PhysPressure) << "Virt: "
+ << print(VirtPressure) << "Phys: " << print(PhysPressure),
false));
}
>From d8b6fc0e5e84ce15bf9a7a396d36b5f8a9ddaf77 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Thu, 7 May 2026 18:56:36 -0500
Subject: [PATCH 13/25] Updated remove() method to ensure registers and units
are in sync.
If there is aliasing between 2 entries in physical register tracker,
the remove method for one of those registers may remove regunits
that correspond to the other entry that is still in the tracker. This
produces an inconsistency. This patch fixes this problem by first
removing the register/units but then reconstructing the units from
the remaining registers that are present in the tracker.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 2 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 21 ++++++++++++++++-----
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3a686b7b0d2c5..bb5a94ee19863 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -853,7 +853,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
} else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
if (WasNotLive && !MO.isDead()) {
- PhysLiveRegs.add(Reg);
+ PhysLiveRegs.add(Reg.asMCReg());
CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 3ad8eb8f094b9..0928f99053046 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -345,17 +345,28 @@ class GCNRPTracker {
}
const BitVector &getBitVector() const { return Units.getBitVector(); }
- void add(Register Reg) {
+ void add(MCRegister Reg) {
Units.addReg(Reg);
- Regs.insert(Reg.asMCReg());
+ Regs.insert(Reg);
}
- void remove(Register Reg) {
+ void remove(MCRegister Reg) {
+ Regs.erase(Reg);
Units.removeReg(Reg);
- Regs.erase(Reg.asMCReg());
+ restoreSharedUnits();
}
void remove(const BitVector &KilledUnits, MCRegister Reg) {
- Units.removeUnits(KilledUnits);
Regs.erase(Reg);
+ Units.removeUnits(KilledUnits);
+ restoreSharedUnits();
+ }
+
+ private:
+ // When a register is removed, its regunits are also removed. But
+ // because of aliasing, some of those regunits may be shared with
+ // other registers still in Regs. This restores those shared regunits.
+ void restoreSharedUnits() {
+ for (MCRegister R : Regs)
+ Units.addReg(R);
}
};
PhysicalRegLiveness PhysLiveRegs;
>From b4f8fcd4f7756d6b00edaa8e3a1a898457556c05 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 8 May 2026 18:19:24 -0500
Subject: [PATCH 14/25] Added comments on limitations of physical register
tracking.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 16 +++++++++++++---
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 16 ++++++++++++++++
2 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index bb5a94ee19863..0cdc2059202e7 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -515,11 +515,14 @@ bool GCNRPTracker::checkRegKilled(MCRegister Reg, SlotIndex SI) const {
});
}
+// Known aliasing limitations (see also PhysicalRegLiveness limitations):
+// 1. If Reg is not in Regs, the early return skips any killed units and
+// no pressure decrement occurs (over-counting).
+// 2. If Reg is in Regs but only some of its units are killed, the entire
+// register is removed from Regs and the caller decrements pressure for
+// the full register, not just the killed portion (under-counting).
bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
assert(SRI && "SRI not initialized");
- // Due to aliasing, a physical register may not be present in
- // PhysLiveRegs.Regs, but one of its regunits may show up as killed. Return
- // early in this case.
if (!PhysLiveRegs.Regs.contains(Reg))
return false;
BitVector KilledUnits(PhysLiveRegs.getBitVector().size(), false);
@@ -534,6 +537,9 @@ bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
return true;
}
+// Only matches Reg by exact name in Regs. If an overlapping register is live
+// (e.g., a sub-register is in Regs when a super-register is defined, or vice
+// versa), this will not find or remove it. See PhysicalRegLiveness limitations.
bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
assert(SRI && "SRI not initialized");
if (!PhysLiveRegs.Regs.contains(Reg))
@@ -542,6 +548,10 @@ bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
return true;
}
+// Checks liveness at the unit level, but adds the full register to Regs if any
+// unit is new. When an overlapping register is already in Regs, the shared
+// units are already live but the full register is still added, leading to
+// over-counted pressure. See PhysicalRegLiveness limitations.
bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
assert(SRI && "SRI not initialized");
const BitVector &Units = PhysLiveRegs.getBitVector();
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 0928f99053046..923e191d769a5 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -331,6 +331,22 @@ class GCNRPTracker {
// Physical register liveness: Units provides O(1) unit-level alias checks,
// Regs tracks which register names contributed to pressure for cheap
// reconstruction. Both must be kept in sync.
+ //
+ // Known limitations:
+ // 1. Aliasing can cause physical register pressure to be over-counted.
+ // Regs tracks exact register names, so overlapping tuples are not
+ // recognized as related. This can manifest in three ways:
+ // a) A def of a super-register fails to kill a live sub-register in
+ // Regs, leaving over-counted pressure above the def.
+ // b) Two overlapping registers both added to Regs cause shared units
+ // to be over-counted in pressure.
+ // c) A def of a sub-register cannot partially kill a live
+ // super-register in Regs, leaving over-counted pressure when only part
+ // of it should remain.
+ // 2. Physical register live-in/live-out is not modeled. reset()
+ // initializes CurPressure from virtual registers only and clears
+ // PhysLiveRegs. Physical registers that are live through a region
+ // without being used are invisible, leading to under-counted pressure.
struct PhysicalRegLiveness {
LiveRegUnits Units;
SmallDenseSet<MCRegister, 16> Regs;
>From 14962991178ae273bbaf3ba7a7deb16ad1c3a1f3 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 11 May 2026 12:32:12 -0500
Subject: [PATCH 15/25] Use inc/dec.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 20 ++++++++++----------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 18 +++++++++++++++---
2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 0cdc2059202e7..6667d01cad09c 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,8 +99,8 @@ void GCNRegPressure::inc(unsigned Reg, LaneBitmask PrevMask,
Value[RegKind] += Sign;
}
-void GCNRegPressure::inc(MCRegister Reg, bool IsAdd,
- const MachineRegisterInfo &MRI) {
+void GCNRegPressure::adjustPhysRegPressure(MCRegister Reg, bool IsAdd,
+ const MachineRegisterInfo &MRI) {
if (!MRI.isAllocatable(Reg))
return;
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
@@ -566,7 +566,7 @@ bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
GCNRegPressure GCNRPTracker::constructPhysRegPressure() const {
GCNRegPressure Res;
for (MCRegister Reg : PhysLiveRegs.Regs)
- Res.inc(Reg, /*IsAdd=*/true, *MRI);
+ Res.inc(Reg, *MRI);
return Res;
}
@@ -693,13 +693,13 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
VirtLiveRegs.erase(I);
} else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
if (MO.isEarlyClobber()) {
- ECDefPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ ECDefPressure.inc(Reg.asMCReg(), *MRI);
HasECDefs = true;
}
bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
if (WasLive)
- CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
+ CurPressure.dec(Reg.asMCReg(), *MRI);
}
}
@@ -730,7 +730,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
continue;
bool NewlyLive = insertIfNotLive(Reg.asMCReg());
if (NewlyLive)
- CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ CurPressure.inc(Reg.asMCReg(), *MRI);
}
}
@@ -829,7 +829,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
if (!SeenRegs.insert(Reg).second)
continue;
if (eraseKilledUnits(Reg.asMCReg(), SI))
- CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
+ CurPressure.dec(Reg.asMCReg(), *MRI);
}
}
@@ -864,7 +864,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
if (WasNotLive && !MO.isDead()) {
PhysLiveRegs.add(Reg.asMCReg());
- CurPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ CurPressure.inc(Reg.asMCReg(), *MRI);
}
}
}
@@ -997,7 +997,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
if (WasNotLive && !MO.isDead())
- TempPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ TempPressure.inc(Reg.asMCReg(), *MRI);
}
SeenRegs.clear();
@@ -1011,7 +1011,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
bool IsKilled = checkRegKilled(Reg.asMCReg(), SlotIdx);
if (IsKilled)
- TempPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
+ TempPressure.dec(Reg.asMCReg(), *MRI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 923e191d769a5..a799c9e44c925 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -126,15 +126,27 @@ struct GCNRegPressure {
return std::max(UnifiedSpill, ArchSpill + AGPRSpill);
}
+ /// Adjust pressure for a virtual register.
void inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
- /// Update pressure for a physical register (add or remove). Used when
- /// tracking physical registers.
- void inc(MCRegister Reg, bool IsAdd, const MachineRegisterInfo &MRI);
+ /// Increment pressure for a physical register.
+ void inc(MCRegister Reg, const MachineRegisterInfo &MRI) {
+ adjustPhysRegPressure(Reg, /*IsAdd=*/true, MRI);
+ }
+
+ /// Decrement pressure for a physical register.
+ void dec(MCRegister Reg, const MachineRegisterInfo &MRI) {
+ adjustPhysRegPressure(Reg, /*IsAdd=*/false, MRI);
+ }
+private:
+ void adjustPhysRegPressure(MCRegister Reg, bool IsAdd,
+ const MachineRegisterInfo &MRI);
+
+public:
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
unsigned DynamicVGPRBlockSize) const {
return getOccupancy(ST, DynamicVGPRBlockSize) >
>From 323b0a1101fbf6a926c774da57888d539154dcc4 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 11 May 2026 18:50:08 -0500
Subject: [PATCH 16/25] Added helper shouldTrackPhysReg.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 19 ++++++++-----------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 4 ++++
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 6667d01cad09c..a571e93d60f44 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -101,8 +101,7 @@ void GCNRegPressure::inc(unsigned Reg, LaneBitmask PrevMask,
void GCNRegPressure::adjustPhysRegPressure(MCRegister Reg, bool IsAdd,
const MachineRegisterInfo &MRI) {
- if (!MRI.isAllocatable(Reg))
- return;
+ assert(MRI.isAllocatable(Reg) && "expected allocatable physical register");
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
if (!RC)
@@ -691,7 +690,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
VirtLiveRegs.erase(I);
- } else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
+ } else if (shouldTrackPhysReg(Reg)) {
if (MO.isEarlyClobber()) {
ECDefPressure.inc(Reg.asMCReg(), *MRI);
HasECDefs = true;
@@ -723,10 +722,10 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// Physical register handling needs the register directly to avoid aliasing,
// so we need to iterate over all uses.
for (const MachineOperand &MO : MI.all_uses()) {
- if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
+ if (!MO.isReg() || !MO.readsReg())
continue;
Register Reg = MO.getReg();
- if (!MRI->isAllocatable(Reg))
+ if (!shouldTrackPhysReg(Reg))
continue;
bool NewlyLive = insertIfNotLive(Reg.asMCReg());
if (NewlyLive)
@@ -825,7 +824,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
CurPressure.inc(Reg, It->second, LaneBitmask::getNone(), *MRI);
VirtLiveRegs.erase(It);
}
- } else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
+ } else if (shouldTrackPhysReg(Reg)) {
if (!SeenRegs.insert(Reg).second)
continue;
if (eraseKilledUnits(Reg.asMCReg(), SI))
@@ -860,7 +859,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
auto PrevMask = LiveMask;
LiveMask |= getDefRegMask(MO, *MRI);
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
- } else if (TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg)) {
+ } else if (shouldTrackPhysReg(Reg)) {
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
if (WasNotLive && !MO.isDead()) {
PhysLiveRegs.add(Reg.asMCReg());
@@ -991,8 +990,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
// aliasing, so we need to iterate over the defs and uses separately.
for (const auto &MO : MI->all_defs()) {
Register Reg = MO.getReg();
- if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
- !SeenRegs.insert(Reg).second)
+ if (!shouldTrackPhysReg(Reg) || !SeenRegs.insert(Reg).second)
continue;
bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
@@ -1005,8 +1003,7 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
Register Reg = MO.getReg();
- if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
- !SeenRegs.insert(Reg).second)
+ if (!shouldTrackPhysReg(Reg) || !SeenRegs.insert(Reg).second)
continue;
bool IsKilled = checkRegKilled(Reg.asMCReg(), SlotIdx);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index a799c9e44c925..302071ab05935 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -439,6 +439,10 @@ class GCNRPTracker {
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const;
+ bool shouldTrackPhysReg(Register Reg) const {
+ return TrackPhysRegs && Reg.isPhysical() && MRI->isAllocatable(Reg);
+ }
+
// Check if a register unit is live at a given slot index per LIS.
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
>From 037b27f9b76a9eb8420ff023e10e6722f4e8d426 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 11 May 2026 18:50:59 -0500
Subject: [PATCH 17/25] Added new test for physical register isAllocatable and
aliasing.
---
.../AMDGPU/regpressure-physreg-limits.mir | 124 ++++++++++++++++++
1 file changed, 124 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
new file mode 100644
index 0000000000000..4438d21c69efd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
@@ -0,0 +1,124 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 %s 2>&1 | FileCheck %s --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 %s 2>&1 | FileCheck %s --check-prefix=RPU-NOPHYS
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 -amdgpu-print-rp-downward %s 2>&1 | FileCheck %s --check-prefix=RPD
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 -amdgpu-print-rp-downward -amdgpu-trackers-physical-register-tracking=0 %s 2>&1 | FileCheck %s --check-prefix=RPD-NOPHYS
+
+# Tests for physical register pressure tracking edge cases:
+# 1. Non-allocatable registers (e.g. $scc) should not affect pressure.
+# 2. Aliasing: a tuple def ($sgpr10_sgpr11) does not kill a live sub-register
+# ($sgpr11) tracked under a different name, leading to over-counted pressure.
+
+# Non-allocatable physical register should not change pressure.
+---
+name: nonallocatable_physreg
+tracksRegLiveness: true
+machineFunctionInfo:
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; RPU-LABEL: name: nonallocatable_physreg
+ ; RPU: bb.0:
+ ; RPU: SGPR VGPR
+ ; RPU: 2 1
+ ; RPU: 2 1 S_NOP 0, implicit-def $scc
+ ; RPU: 2 1
+ ;
+ ; RPU-NOPHYS-LABEL: name: nonallocatable_physreg
+ ; RPU-NOPHYS: bb.0:
+ ; RPU-NOPHYS: SGPR VGPR
+ ; RPU-NOPHYS: 2 1
+ ; RPU-NOPHYS: 2 1 S_NOP 0, implicit-def $scc
+ ; RPU-NOPHYS: 2 1
+ ;
+ ; RPD-LABEL: name: nonallocatable_physreg
+ ; RPD: bb.0:
+ ; RPD: SGPR VGPR
+ ; RPD: 2 1
+ ; RPD: 2 1 S_NOP 0, implicit-def $scc
+ ; RPD: 2 1
+ ;
+ ; RPD-NOPHYS-LABEL: name: nonallocatable_physreg
+ ; RPD-NOPHYS: bb.0:
+ ; RPD-NOPHYS: SGPR VGPR
+ ; RPD-NOPHYS: 2 1
+ ; RPD-NOPHYS: 2 1 S_NOP 0, implicit-def $scc
+ ; RPD-NOPHYS: 2 1
+ bb.0:
+ liveins: $sgpr8_sgpr9
+
+ %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_NOP 0, implicit-def $scc
+ GLOBAL_STORE_DWORD_SADDR %2, %2, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
+
+# Aliasing: $sgpr10_sgpr11 is defined, then $sgpr11 (a sub-register) is used.
+# The upward tracker adds $sgpr11 to the tracker when processing the use.
+# When receding past the def of $sgpr10_sgpr11, the tracker looks for
+# $sgpr10_sgpr11 by exact name and does not find it (only $sgpr11 is
+# there), so no pressure decrement occurs. This verifies the known aliasing
+# over-count: pressure from $sgpr11 persists above the def of $sgpr10_sgpr11.
+---
+name: aliased_physreg_tuple_def_subreg_use
+tracksRegLiveness: true
+machineFunctionInfo:
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; RPU-LABEL: name: aliased_physreg_tuple_def_subreg_use
+ ; RPU: bb.0:
+ ; RPU: SGPR VGPR
+ ;
+ ; The def of $sgpr10_sgpr11 should kill $sgpr11, but the tracker
+ ; matches by exact register name and only finds $sgpr11, not
+ ; $sgpr10_sgpr11. So $sgpr11 pressure (1 SGPR) persists above the def,
+ ; giving 3 SGPRs instead of the ideal 2. This is the known aliasing
+ ; over-count.
+ ;
+ ; RPU: 3 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
+ ; RPU: 3 1
+ ; RPU: 3 1 %3:sreg_32 = COPY $sgpr11
+ ;
+ ; RPU-NOPHYS-LABEL: name: aliased_physreg_tuple_def_subreg_use
+ ; RPU-NOPHYS: bb.0:
+ ; RPU-NOPHYS: SGPR VGPR
+ ;
+ ; Without physreg tracking, no physical register pressure is counted,
+ ; so the INLINEASM shows only 2 SGPRs.
+ ;
+ ; RPU-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
+ ; RPU-NOPHYS: 2 1
+ ; RPU-NOPHYS: 3 1 %3:sreg_32 = COPY $sgpr11
+ ;
+ ; RPD-LABEL: name: aliased_physreg_tuple_def_subreg_use
+ ; RPD: bb.0:
+ ; RPD: SGPR VGPR
+ ;
+ ; Downward tracker: the INLINEASM def adds $sgpr10_sgpr11 (2 SGPRs) to
+ ; the tracker. At the COPY, the tracker is not able to find
+ ; $sgpr11 (only $sgpr10_sgpr11 is there), so no decrement occurs.
+ ; The tuple pressure (2 SGPRs) persists, and %3 adds 1 more → 5 SGPRs.
+ ;
+ ; RPD: 4 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
+ ; RPD: 4 1
+ ; RPD: 5 1 %3:sreg_32 = COPY $sgpr11
+ ;
+ ; RPD-NOPHYS-LABEL: name: aliased_physreg_tuple_def_subreg_use
+ ; RPD-NOPHYS: bb.0:
+ ; RPD-NOPHYS: SGPR VGPR
+ ;
+ ; RPD-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
+ ; RPD-NOPHYS: 2 1
+ ; RPD-NOPHYS: 3 1 %3:sreg_32 = COPY $sgpr11
+ bb.0:
+ liveins: $sgpr8_sgpr9
+
+ %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ INLINEASM &"s_mov_b64 $0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr10_sgpr11
+ %3:sreg_32 = COPY $sgpr11
+ %4:vgpr_32 = COPY %3
+ GLOBAL_STORE_DWORD_SADDR %2, %4, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
>From 2fe69874c02f980520c97f2b84563039a7e16044 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 11 May 2026 20:45:01 -0500
Subject: [PATCH 18/25] Addressed review comments.
- Reverted unintended comment removal.
- Fixed compiler warnings in NDEBUG mode.
- Removed unused method getVirtLiveRegs, kept pre-existing getLiveRegs.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 3 +++
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 1 -
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index a571e93d60f44..626cd3ba642d9 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -674,6 +674,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (Reg.isVirtual()) {
LaneBitmask DefMask = getDefRegMask(MO, *MRI);
+ // Treat a def as fully live at the moment of definition: keep a record.
if (MO.isEarlyClobber()) {
ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
HasECDefs = true;
@@ -737,6 +738,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
: max(CurPressure, MaxPressure);
+#ifndef NDEBUG
auto VirtPressure = getVirtRegPressure(*MRI, VirtLiveRegs);
auto PhysPressure = constructPhysRegPressure();
assert(CurPressure == VirtPressure + PhysPressure ||
@@ -745,6 +747,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
<< print(VirtPressure + PhysPressure) << "Virt: "
<< print(VirtPressure) << "Phys: " << print(PhysPressure),
false));
+#endif
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 302071ab05935..5a0ba3934e72a 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -477,7 +477,6 @@ class GCNRPTracker {
// live regs for the current state
const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
- const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
void clearMaxPressure() { MaxPressure.clear(); }
>From 1f43a8d6fba0ffd1e819f2ec9843099be4f56457 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 12 May 2026 13:27:05 -0500
Subject: [PATCH 19/25] Added PhysicalRegLiveness ctor, removed conditional
init.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 4 ----
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 22 ++++++++++------------
2 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 626cd3ba642d9..eb92f7f383566 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -624,8 +624,6 @@ void GCNRPTracker::reset(const MachineInstr &MI,
// Always clear PhysLiveRegs even when TrackPhysRegs is false, to avoid
// stale data if physical tracking was previously enabled.
PhysLiveRegs.clear();
- if (TrackPhysRegs)
- PhysLiveRegs.init(*SRI);
}
void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
@@ -640,8 +638,6 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
// Always clear PhysLiveRegs even when TrackPhysRegs is false, to avoid
// stale data if physical tracking was previously enabled.
PhysLiveRegs.clear();
- if (TrackPhysRegs)
- PhysLiveRegs.init(*SRI);
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 5a0ba3934e72a..5a3fed3824acb 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -363,10 +363,11 @@ class GCNRPTracker {
LiveRegUnits Units;
SmallDenseSet<MCRegister, 16> Regs;
- void init(const TargetRegisterInfo &TRI) {
+ PhysicalRegLiveness() = delete;
+ explicit PhysicalRegLiveness(const TargetRegisterInfo &TRI) {
Units.init(TRI);
- Regs.clear();
}
+
void clear() {
Units.clear();
Regs.clear();
@@ -397,6 +398,8 @@ class GCNRPTracker {
Units.addReg(R);
}
};
+ mutable const MachineRegisterInfo *MRI = nullptr;
+ const SIRegisterInfo *SRI = nullptr;
PhysicalRegLiveness PhysLiveRegs;
GCNRegPressure CurPressure, MaxPressure;
@@ -406,26 +409,21 @@ class GCNRPTracker {
bool TrackPhysRegs = false;
const MachineInstr *LastTrackedMI = nullptr;
- mutable const MachineRegisterInfo *MRI = nullptr;
- const SIRegisterInfo *SRI = nullptr;
GCNRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
: LIS(LIS), MRI(&MRI),
- SRI(static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo())) {
+ SRI(static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo())),
+ PhysLiveRegs(*SRI) {
setPhysRegTracking();
- if (TrackPhysRegs)
- PhysLiveRegs.init(*SRI);
}
- // Copy constructor - PhysLiveRegs.Units must be initialized then copied.
GCNRPTracker(const GCNRPTracker &Other)
: LIS(Other.LIS), VirtLiveRegs(Other.VirtLiveRegs),
+ MRI(Other.MRI), SRI(Other.SRI), PhysLiveRegs(*SRI),
CurPressure(Other.CurPressure), MaxPressure(Other.MaxPressure),
- TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
- MRI(Other.MRI), SRI(Other.SRI) {
+ TrackPhysRegs(Other.TrackPhysRegs),
+ LastTrackedMI(Other.LastTrackedMI) {
if (TrackPhysRegs) {
- assert(SRI && "SRI not initialized");
- PhysLiveRegs.init(*SRI);
PhysLiveRegs.Units.addUnits(Other.PhysLiveRegs.getBitVector());
PhysLiveRegs.Regs = Other.PhysLiveRegs.Regs;
}
>From 29f397ff369a2d581f5382b6ce6f8f2771c5f22e Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 12 May 2026 14:44:29 -0500
Subject: [PATCH 20/25] Addressed review comments.
- Removed copy ctor.
- Removed isReg() query for all_uses().
- Simplify code by using isAnyRegUnitNotLive.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 16 +++++-----------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 12 ------------
2 files changed, 5 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index eb92f7f383566..74ec096469fc4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -552,14 +552,10 @@ bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
// units are already live but the full register is still added, leading to
// over-counted pressure. See PhysicalRegLiveness limitations.
bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
- assert(SRI && "SRI not initialized");
- const BitVector &Units = PhysLiveRegs.getBitVector();
- bool NewlyLive = llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
- return !Units.test(static_cast<unsigned>(Unit));
- });
- if (NewlyLive)
- PhysLiveRegs.add(Reg);
- return NewlyLive;
+ if (!isAnyRegUnitNotLive(Reg))
+ return false;
+ PhysLiveRegs.add(Reg);
+ return true;
}
GCNRegPressure GCNRPTracker::constructPhysRegPressure() const {
@@ -719,7 +715,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// Physical register handling needs the register directly to avoid aliasing,
// so we need to iterate over all uses.
for (const MachineOperand &MO : MI.all_uses()) {
- if (!MO.isReg() || !MO.readsReg())
+ if (!MO.readsReg())
continue;
Register Reg = MO.getReg();
if (!shouldTrackPhysReg(Reg))
@@ -999,8 +995,6 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
SeenRegs.clear();
for (const auto &MO : MI->all_uses()) {
- if (!MO.isReg() || !MO.getReg().isPhysical())
- continue;
Register Reg = MO.getReg();
if (!shouldTrackPhysReg(Reg) || !SeenRegs.insert(Reg).second)
continue;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 5a3fed3824acb..7081609c14c68 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -417,18 +417,6 @@ class GCNRPTracker {
setPhysRegTracking();
}
- GCNRPTracker(const GCNRPTracker &Other)
- : LIS(Other.LIS), VirtLiveRegs(Other.VirtLiveRegs),
- MRI(Other.MRI), SRI(Other.SRI), PhysLiveRegs(*SRI),
- CurPressure(Other.CurPressure), MaxPressure(Other.MaxPressure),
- TrackPhysRegs(Other.TrackPhysRegs),
- LastTrackedMI(Other.LastTrackedMI) {
- if (TrackPhysRegs) {
- PhysLiveRegs.Units.addUnits(Other.PhysLiveRegs.getBitVector());
- PhysLiveRegs.Regs = Other.PhysLiveRegs.Regs;
- }
- }
-
void reset(const MachineInstr &MI, const LiveRegSet *VirtLiveRegsCopy,
bool After);
>From a26bdd263b8a335b84260aeba44f1448d4bd5a20 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Wed, 13 May 2026 13:52:02 -0500
Subject: [PATCH 21/25] Remove tracking of registers from physical GCN tracker.
The tracker is now just a bitvector.
All tracking is now at the regunit level, handling aliasing accurately.
Tuple and raw pressure are adjusted by the same delta.
Updated tests accordingly.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 267 ++++++++----------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 117 +++-----
.../AMDGPU/regpressure-physreg-limits.mir | 34 ++-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 2 +-
4 files changed, 164 insertions(+), 256 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 74ec096469fc4..dbcd820bc046d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,12 +14,8 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -99,23 +95,32 @@ void GCNRegPressure::inc(unsigned Reg, LaneBitmask PrevMask,
Value[RegKind] += Sign;
}
-void GCNRegPressure::adjustPhysRegPressure(MCRegister Reg, bool IsAdd,
- const MachineRegisterInfo &MRI) {
- assert(MRI.isAllocatable(Reg) && "expected allocatable physical register");
- const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- if (!RC)
- return;
- const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
- unsigned RegKind = getRegKind(RC, STI);
- unsigned NumRegs = divideCeil(TRI->getRegSizeInBits(*RC), 32);
- int Sign = IsAdd ? 1 : -1;
-
- if (TRI->getRegSizeInBits(*RC) != 32) {
- unsigned TupleIdx = TOTAL_KINDS + RegKind;
- Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
+unsigned GCNRegPressure::pressureSetToRegKind(unsigned PSetID) {
+ switch (PSetID) {
+ case AMDGPU::RegisterPressureSets::SReg_32:
+ return SGPR;
+ case AMDGPU::RegisterPressureSets::AGPR_32:
+ return AGPR;
+ case AMDGPU::RegisterPressureSets::VGPR_32:
+ return VGPR;
}
- Value[RegKind] += Sign * static_cast<int>(NumRegs);
+ llvm_unreachable("unexpected pressure set");
+}
+
+// Adjusts both raw count and tuple weight per unit. Raw count and
+// tuple weight receive identical increments. This means 32-bit physical
+// registers contribute to tuple weight (unlike virtual registers where only
+// tuples > 32-bit contribute).
+void GCNRegPressure::adjustPhysUnitPressure(MCRegUnit Unit, bool IsAdd,
+ const SIRegisterInfo &SRI) {
+ const int *PSetIDs = SRI.getRegUnitPressureSets(Unit);
+ if (PSetIDs[0] == -1)
+ return;
+ assert(PSetIDs[1] == -1 && "expected single pressure set per unit");
+ unsigned Kind = pressureSetToRegKind(PSetIDs[0]);
+ int Delta = (IsAdd ? 1 : -1) * static_cast<int>(SRI.getRegUnitWeight(Unit));
+ Value[Kind] += Delta;
+ Value[TOTAL_KINDS + Kind] += Delta;
}
namespace {
@@ -498,70 +503,46 @@ bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
return !LR || LR->liveAt(SI);
}
-bool GCNRPTracker::isAnyRegUnitNotLive(MCRegister Reg) const {
+void GCNRPTracker::addUnitsAndIncPressure(MCRegister Reg,
+ GCNRegPressure &Pressure) {
assert(SRI && "SRI not initialized");
- const BitVector &Units = PhysLiveRegs.getBitVector();
- return llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
- return !Units.test(static_cast<unsigned>(Unit));
- });
-}
-
-bool GCNRPTracker::checkRegKilled(MCRegister Reg, SlotIndex SI) const {
- assert(SRI && "SRI not initialized");
- const BitVector &Units = PhysLiveRegs.getBitVector();
- return llvm::any_of(SRI->regunits(Reg), [&](MCRegUnit Unit) {
- return Units.test(static_cast<unsigned>(Unit)) && !isUnitLiveAt(Unit, SI);
- });
+ for (MCRegUnit Unit : SRI->regunits(Reg)) {
+ unsigned U = static_cast<unsigned>(Unit);
+ if (!PhysLiveRegUnits.test(U)) {
+ PhysLiveRegUnits.set(U);
+ Pressure.inc(Unit, *SRI);
+ }
+ }
}
-// Known aliasing limitations (see also PhysicalRegLiveness limitations):
-// 1. If Reg is not in Regs, the early return skips any killed units and
-// no pressure decrement occurs (over-counting).
-// 2. If Reg is in Regs but only some of its units are killed, the entire
-// register is removed from Regs and the caller decrements pressure for
-// the full register, not just the killed portion (under-counting).
-bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
+void GCNRPTracker::removeUnitsAndDecPressure(MCRegister Reg,
+ GCNRegPressure &Pressure) {
assert(SRI && "SRI not initialized");
- if (!PhysLiveRegs.Regs.contains(Reg))
- return false;
- BitVector KilledUnits(PhysLiveRegs.getBitVector().size(), false);
for (MCRegUnit Unit : SRI->regunits(Reg)) {
unsigned U = static_cast<unsigned>(Unit);
- if (PhysLiveRegs.getBitVector().test(U) && !isUnitLiveAt(Unit, SI))
- KilledUnits.set(U);
+ if (PhysLiveRegUnits.test(U)) {
+ PhysLiveRegUnits.reset(U);
+ Pressure.dec(Unit, *SRI);
+ }
}
- if (KilledUnits.none())
- return false;
- PhysLiveRegs.remove(KilledUnits, Reg);
- return true;
}
-// Only matches Reg by exact name in Regs. If an overlapping register is live
-// (e.g., a sub-register is in Regs when a super-register is defined, or vice
-// versa), this will not find or remove it. See PhysicalRegLiveness limitations.
-bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
+void GCNRPTracker::removeKilledUnitsAndDecPressure(MCRegister Reg, SlotIndex SI,
+ GCNRegPressure &Pressure) {
assert(SRI && "SRI not initialized");
- if (!PhysLiveRegs.Regs.contains(Reg))
- return false;
- PhysLiveRegs.remove(Reg);
- return true;
-}
-
-// Checks liveness at the unit level, but adds the full register to Regs if any
-// unit is new. When an overlapping register is already in Regs, the shared
-// units are already live but the full register is still added, leading to
-// over-counted pressure. See PhysicalRegLiveness limitations.
-bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
- if (!isAnyRegUnitNotLive(Reg))
- return false;
- PhysLiveRegs.add(Reg);
- return true;
+ for (MCRegUnit Unit : SRI->regunits(Reg)) {
+ unsigned U = static_cast<unsigned>(Unit);
+ if (PhysLiveRegUnits.test(U) && !isUnitLiveAt(Unit, SI)) {
+ PhysLiveRegUnits.reset(U);
+ Pressure.dec(Unit, *SRI);
+ }
+ }
}
GCNRegPressure GCNRPTracker::constructPhysRegPressure() const {
GCNRegPressure Res;
- for (MCRegister Reg : PhysLiveRegs.Regs)
- Res.inc(Reg, *MRI);
+ for (unsigned U : PhysLiveRegUnits.set_bits())
+ Res.inc(static_cast<MCRegUnit>(U), *SRI);
return Res;
}
@@ -617,9 +598,9 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getVirtRegPressure(*MRI, VirtLiveRegs);
setPhysRegTracking();
- // Always clear PhysLiveRegs even when TrackPhysRegs is false, to avoid
+ // Always clear PhysLiveRegUnits even when TrackPhysRegs is false, to avoid
// stale data if physical tracking was previously enabled.
- PhysLiveRegs.clear();
+ PhysLiveRegUnits.reset();
}
void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
@@ -631,9 +612,9 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
MaxPressure = CurPressure = getVirtRegPressure(MRInfo, VirtLiveRegsSet);
setPhysRegTracking();
- // Always clear PhysLiveRegs even when TrackPhysRegs is false, to avoid
+ // Always clear PhysLiveRegUnits even when TrackPhysRegs is false, to avoid
// stale data if physical tracking was previously enabled.
- PhysLiveRegs.clear();
+ PhysLiveRegUnits.reset();
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
@@ -685,13 +666,12 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
VirtLiveRegs.erase(I);
} else if (shouldTrackPhysReg(Reg)) {
if (MO.isEarlyClobber()) {
- ECDefPressure.inc(Reg.asMCReg(), *MRI);
+ for (MCRegUnit Unit : SRI->regunits(Reg.asMCReg()))
+ ECDefPressure.inc(Unit, *SRI);
HasECDefs = true;
}
- bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
- if (WasLive)
- CurPressure.dec(Reg.asMCReg(), *MRI);
+ removeUnitsAndDecPressure(Reg.asMCReg(), CurPressure);
}
}
@@ -712,17 +692,13 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
}
if (TrackPhysRegs) {
- // Physical register handling needs the register directly to avoid aliasing,
- // so we need to iterate over all uses.
for (const MachineOperand &MO : MI.all_uses()) {
if (!MO.readsReg())
continue;
Register Reg = MO.getReg();
if (!shouldTrackPhysReg(Reg))
continue;
- bool NewlyLive = insertIfNotLive(Reg.asMCReg());
- if (NewlyLive)
- CurPressure.inc(Reg.asMCReg(), *MRI);
+ addUnitsAndIncPressure(Reg.asMCReg(), CurPressure);
}
}
@@ -822,8 +798,7 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
} else if (shouldTrackPhysReg(Reg)) {
if (!SeenRegs.insert(Reg).second)
continue;
- if (eraseKilledUnits(Reg.asMCReg(), SI))
- CurPressure.dec(Reg.asMCReg(), *MRI);
+ removeKilledUnitsAndDecPressure(Reg.asMCReg(), SI, CurPressure);
}
}
@@ -855,11 +830,8 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
LiveMask |= getDefRegMask(MO, *MRI);
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
} else if (shouldTrackPhysReg(Reg)) {
- bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
- if (WasNotLive && !MO.isDead()) {
- PhysLiveRegs.add(Reg.asMCReg());
- CurPressure.inc(Reg.asMCReg(), *MRI);
- }
+ if (!MO.isDead())
+ addUnitsAndIncPressure(Reg.asMCReg(), CurPressure);
}
}
@@ -931,77 +903,62 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
GCNRegPressure TempPressure = CurPressure;
- // Process virtual register uses
+ // Process uses: decrement pressure for last-use lanes (virtual) or
+ // killed units (physical).
for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
- if (!Use.VRegOrUnit.isVirtualReg())
- continue;
- Register Reg = Use.VRegOrUnit.asVirtualReg();
- LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
- if (LastUseMask.none())
- continue;
- // The LastUseMask is queried from the liveness information of instruction
- // which may be further down the schedule. Some lanes may actually not be
- // last uses for the current position.
- // FIXME: allow the caller to pass in the list of vreg uses that remain
- // to be bottom-scheduled to avoid searching uses at each query.
- SlotIndex CurrIdx;
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_iterator IdxPos = skipDebugInstructionsForward(
- LastTrackedMI ? LastTrackedMI : MBB->begin(), MBB->end());
- if (IdxPos == MBB->end()) {
- CurrIdx = LIS.getMBBEndIdx(MBB);
- } else {
- CurrIdx = LIS.getInstructionIndex(*IdxPos).getRegSlot();
- }
-
- LastUseMask =
- findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, TRI, &LIS);
- if (LastUseMask.none())
- continue;
-
- auto It = VirtLiveRegs.find(Reg);
- LaneBitmask LiveMask =
- It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
- LaneBitmask NewMask = LiveMask & ~LastUseMask;
- TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
- }
-
- // Generate liveness for virtual register defs.
- for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
- if (!Def.VRegOrUnit.isVirtualReg())
- continue;
- Register Reg = Def.VRegOrUnit.asVirtualReg();
- auto It = VirtLiveRegs.find(Reg);
- LaneBitmask LiveMask =
- It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
- LaneBitmask NewMask = LiveMask | Def.LaneMask;
- TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
- }
+ if (Use.VRegOrUnit.isVirtualReg()) {
+ Register Reg = Use.VRegOrUnit.asVirtualReg();
+ LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
+ if (LastUseMask.none())
+ continue;
+ // The LastUseMask is queried from the liveness information of instruction
+ // which may be further down the schedule. Some lanes may actually not be
+ // last uses for the current position.
+ // FIXME: allow the caller to pass in the list of vreg uses that remain
+ // to be bottom-scheduled to avoid searching uses at each query.
+ SlotIndex CurrIdx;
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_iterator IdxPos = skipDebugInstructionsForward(
+ LastTrackedMI ? LastTrackedMI : MBB->begin(), MBB->end());
+ if (IdxPos == MBB->end()) {
+ CurrIdx = LIS.getMBBEndIdx(MBB);
+ } else {
+ CurrIdx = LIS.getInstructionIndex(*IdxPos).getRegSlot();
+ }
- // Process physical registers (only if enabled).
- if (TrackPhysRegs) {
- SmallSet<Register, 8> SeenRegs;
- // Physical register handling needs the registers directly to avoid
- // aliasing, so we need to iterate over the defs and uses separately.
- for (const auto &MO : MI->all_defs()) {
- Register Reg = MO.getReg();
- if (!shouldTrackPhysReg(Reg) || !SeenRegs.insert(Reg).second)
+ LastUseMask =
+ findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, TRI, &LIS);
+ if (LastUseMask.none())
continue;
- bool WasNotLive = isAnyRegUnitNotLive(Reg.asMCReg());
- if (WasNotLive && !MO.isDead())
- TempPressure.inc(Reg.asMCReg(), *MRI);
+ auto It = VirtLiveRegs.find(Reg);
+ LaneBitmask LiveMask =
+ It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
+ LaneBitmask NewMask = LiveMask & ~LastUseMask;
+ TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ } else if (TrackPhysRegs) {
+ MCRegUnit Unit = Use.VRegOrUnit.asMCRegUnit();
+ unsigned U = static_cast<unsigned>(Unit);
+ if (PhysLiveRegUnits.test(U) && !isUnitLiveAt(Unit, SlotIdx))
+ TempPressure.dec(Unit, *SRI);
}
+ }
- SeenRegs.clear();
- for (const auto &MO : MI->all_uses()) {
- Register Reg = MO.getReg();
- if (!shouldTrackPhysReg(Reg) || !SeenRegs.insert(Reg).second)
- continue;
-
- bool IsKilled = checkRegKilled(Reg.asMCReg(), SlotIdx);
- if (IsKilled)
- TempPressure.dec(Reg.asMCReg(), *MRI);
+ // Process defs: increment pressure for new lanes (virtual) or
+ // new units (physical).
+ for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
+ if (Def.VRegOrUnit.isVirtualReg()) {
+ Register Reg = Def.VRegOrUnit.asVirtualReg();
+ auto It = VirtLiveRegs.find(Reg);
+ LaneBitmask LiveMask =
+ It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
+ LaneBitmask NewMask = LiveMask | Def.LaneMask;
+ TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ } else if (TrackPhysRegs) {
+ MCRegUnit Unit = Def.VRegOrUnit.asMCRegUnit();
+ unsigned U = static_cast<unsigned>(Unit);
+ if (!PhysLiveRegUnits.test(U))
+ TempPressure.inc(Unit, *SRI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 7081609c14c68..9d7ec2d557eda 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -18,8 +18,8 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
#include "GCNSubtarget.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/LiveIntervals.h"
-#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
#include <array>
@@ -132,19 +132,20 @@ struct GCNRegPressure {
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
- /// Increment pressure for a physical register.
- void inc(MCRegister Reg, const MachineRegisterInfo &MRI) {
- adjustPhysRegPressure(Reg, /*IsAdd=*/true, MRI);
+ /// Increment pressure for a physical register unit.
+ void inc(MCRegUnit Unit, const SIRegisterInfo &SRI) {
+ adjustPhysUnitPressure(Unit, /*IsAdd=*/true, SRI);
}
- /// Decrement pressure for a physical register.
- void dec(MCRegister Reg, const MachineRegisterInfo &MRI) {
- adjustPhysRegPressure(Reg, /*IsAdd=*/false, MRI);
+ /// Decrement pressure for a physical register unit.
+ void dec(MCRegUnit Unit, const SIRegisterInfo &SRI) {
+ adjustPhysUnitPressure(Unit, /*IsAdd=*/false, SRI);
}
private:
- void adjustPhysRegPressure(MCRegister Reg, bool IsAdd,
- const MachineRegisterInfo &MRI);
+ static unsigned pressureSetToRegKind(unsigned PSetID);
+ void adjustPhysUnitPressure(MCRegUnit Unit, bool IsAdd,
+ const SIRegisterInfo &SRI);
public:
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
@@ -337,70 +338,21 @@ class GCNRPTracker {
protected:
const LiveIntervals &LIS;
+ mutable const MachineRegisterInfo *MRI = nullptr;
+ const SIRegisterInfo *SRI = nullptr;
LiveRegSet VirtLiveRegs;
- // Physical register liveness: Units provides O(1) unit-level alias checks,
- // Regs tracks which register names contributed to pressure for cheap
- // reconstruction. Both must be kept in sync.
+ // Physical register liveness tracked at the register-unit level.
+ // Each bit corresponds to a register unit. This avoids aliasing issues
+ // since overlapping physical registers share the same underlying units.
//
- // Known limitations:
- // 1. Aliasing can cause physical register pressure to be over-counted.
- // Regs tracks exact register names, so overlapping tuples are not
- // recognized as related. This can manifest in three ways:
- // a) A def of a super-register fails to kill a live sub-register in
- // Regs, leaving over-counted pressure above the def.
- // b) Two overlapping registers both added to Regs cause shared units
- // to be over-counted in pressure.
- // c) A def of a sub-register cannot partially kill a live
- // super-register in Regs, leaving over-counted pressure when only part
- // of it should remain.
- // 2. Physical register live-in/live-out is not modeled. reset()
- // initializes CurPressure from virtual registers only and clears
- // PhysLiveRegs. Physical registers that are live through a region
- // without being used are invisible, leading to under-counted pressure.
- struct PhysicalRegLiveness {
- LiveRegUnits Units;
- SmallDenseSet<MCRegister, 16> Regs;
-
- PhysicalRegLiveness() = delete;
- explicit PhysicalRegLiveness(const TargetRegisterInfo &TRI) {
- Units.init(TRI);
- }
-
- void clear() {
- Units.clear();
- Regs.clear();
- }
- const BitVector &getBitVector() const { return Units.getBitVector(); }
-
- void add(MCRegister Reg) {
- Units.addReg(Reg);
- Regs.insert(Reg);
- }
- void remove(MCRegister Reg) {
- Regs.erase(Reg);
- Units.removeReg(Reg);
- restoreSharedUnits();
- }
- void remove(const BitVector &KilledUnits, MCRegister Reg) {
- Regs.erase(Reg);
- Units.removeUnits(KilledUnits);
- restoreSharedUnits();
- }
-
- private:
- // When a register is removed, its regunits are also removed. But
- // because of aliasing, some of those regunits may be shared with
- // other registers still in Regs. This restores those shared regunits.
- void restoreSharedUnits() {
- for (MCRegister R : Regs)
- Units.addReg(R);
- }
- };
- mutable const MachineRegisterInfo *MRI = nullptr;
- const SIRegisterInfo *SRI = nullptr;
- PhysicalRegLiveness PhysLiveRegs;
+ // Known limitation:
+ // Physical register live-in/live-out is not modeled. reset()
+ // initializes CurPressure from virtual registers only and clears
+ // PhysLiveRegUnits. Physical registers that are live through a region
+ // without being used are invisible, leading to under-counted pressure.
+ BitVector PhysLiveRegUnits;
GCNRegPressure CurPressure, MaxPressure;
@@ -413,7 +365,7 @@ class GCNRPTracker {
GCNRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
: LIS(LIS), MRI(&MRI),
SRI(static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo())),
- PhysLiveRegs(*SRI) {
+ PhysLiveRegUnits(SRI->getNumRegUnits()) {
setPhysRegTracking();
}
@@ -432,25 +384,20 @@ class GCNRPTracker {
// Check if a register unit is live at a given slot index per LIS.
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
- // Check if any register unit of Reg is not currently live in PhysLiveRegs.
- bool isAnyRegUnitNotLive(MCRegister Reg) const;
-
- // Reconstruct physical register pressure from PhysLiveRegs.Regs.
+ // Construct physical register pressure from PhysLiveRegUnits.
GCNRegPressure constructPhysRegPressure() const;
- // Check if Reg has any killed units at the given slot index.
- bool checkRegKilled(MCRegister Reg, SlotIndex SI) const;
-
- // Check if Reg has any killed units and erase them from PhysLiveRegs.
- bool eraseKilledUnits(MCRegister Reg, SlotIndex SI);
+ // Add units of Reg that are not already live. Increases Pressure for each
+ // newly live unit.
+ void addUnitsAndIncPressure(MCRegister Reg, GCNRegPressure &Pressure);
- // Erase all live units of Reg from PhysLiveRegs.
- // Returns true if any unit was live (and thus erased).
- bool eraseAllLiveUnits(MCRegister Reg);
+ // Remove all live units of Reg. Decreases Pressure for each removed unit.
+ void removeUnitsAndDecPressure(MCRegister Reg, GCNRegPressure &Pressure);
- // Insert units of Reg into PhysLiveRegs if not already live.
- // Returns true if any unit was newly inserted.
- bool insertIfNotLive(MCRegister Reg);
+ // Remove units of Reg that are currently live but killed at SI.
+ // Decreases Pressure for each killed unit.
+ void removeKilledUnitsAndDecPressure(MCRegister Reg, SlotIndex SI,
+ GCNRegPressure &Pressure);
public:
// Enable physical register tracking only if both GCNTrackers and
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
index 4438d21c69efd..bb5f984c60a3e 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
@@ -54,11 +54,10 @@ body: |
...
# Aliasing: $sgpr10_sgpr11 is defined, then $sgpr11 (a sub-register) is used.
-# The upward tracker adds $sgpr11 to the tracker when processing the use.
-# When receding past the def of $sgpr10_sgpr11, the tracker looks for
-# $sgpr10_sgpr11 by exact name and does not find it (only $sgpr11 is
-# there), so no pressure decrement occurs. This verifies the known aliasing
-# over-count: pressure from $sgpr11 persists above the def of $sgpr10_sgpr11.
+# Unit-level tracking correctly handles this: the upward tracker adds
+# $sgpr11's unit when processing the use, and when receding past the def of
+# $sgpr10_sgpr11, it finds and removes $sgpr11's unit, correctly decrementing
+# pressure.
---
name: aliased_physreg_tuple_def_subreg_use
tracksRegLiveness: true
@@ -69,12 +68,10 @@ body: |
; RPU: bb.0:
; RPU: SGPR VGPR
;
- ; The def of $sgpr10_sgpr11 should kill $sgpr11, but the tracker
- ; matches by exact register name and only finds $sgpr11, not
- ; $sgpr10_sgpr11. So $sgpr11 pressure (1 SGPR) persists above the def,
- ; giving 3 SGPRs instead of the ideal 2. This is the known aliasing
- ; over-count.
+ ; The def of $sgpr10_sgpr11 correctly kills $sgpr11's unit via
+ ; unit-level tracking, reducing SGPR pressure from 3 to 2.
;
+ ; RPU: 2 1
; RPU: 3 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
; RPU: 3 1
; RPU: 3 1 %3:sreg_32 = COPY $sgpr11
@@ -84,8 +81,9 @@ body: |
; RPU-NOPHYS: SGPR VGPR
;
; Without physreg tracking, no physical register pressure is counted,
- ; so the INLINEASM shows only 2 SGPRs.
+ ; so the INLINEASM shows no changes in pressure.
;
+ ; RPU-NOPHYS: 2 1
; RPU-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
; RPU-NOPHYS: 2 1
; RPU-NOPHYS: 3 1 %3:sreg_32 = COPY $sgpr11
@@ -94,11 +92,13 @@ body: |
; RPD: bb.0:
; RPD: SGPR VGPR
;
- ; Downward tracker: the INLINEASM def adds $sgpr10_sgpr11 (2 SGPRs) to
- ; the tracker. At the COPY, the tracker is not able to find
- ; $sgpr11 (only $sgpr10_sgpr11 is there), so no decrement occurs.
- ; The tuple pressure (2 SGPRs) persists, and %3 adds 1 more → 5 SGPRs.
+ ; Downward tracker: the INLINEASM def adds $sgpr10_sgpr11 (2 units =
+ ; 2 SGPRs). At the COPY use of $sgpr11, the corresponding reg-unit
+ ; is not found in the cached live ranges, so it conservatively keeps
+ ; the unit live. The virtual register def leads to the increase in
+ ; pressure from 4 to 5.
;
+ ; RPD: 2 1
; RPD: 4 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
; RPD: 4 1
; RPD: 5 1 %3:sreg_32 = COPY $sgpr11
@@ -107,6 +107,10 @@ body: |
; RPD-NOPHYS: bb.0:
; RPD-NOPHYS: SGPR VGPR
;
+ ; Without physreg tracking, no physical register pressure is counted,
+ ; so the INLINEASM shows no changes in pressure.
+ ;
+ ; RPD-NOPHYS: 2 1
; RPD-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
; RPD-NOPHYS: 2 1
; RPD-NOPHYS: 3 1 %3:sreg_32 = COPY $sgpr11
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 534b22aa1c576..7c947f4a942a6 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -273,7 +273,7 @@ entry:
; The input s12 and early-clobber output s[10:11] have distinct live ranges.
; GCN-DEBUG-LABEL: test_early_clobber_tuple
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 9
; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
; GENERIC-DEBUG-LABEL: test_early_clobber_tuple
>From 878f803d14af233b6e154c8ed32365019abd9542 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 12 Jun 2026 12:45:47 -0500
Subject: [PATCH 22/25] Adds support for physical register liveins/liveouts in
GCN trackers.
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 9 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 48 +++++++-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 58 +++++++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 26 ++--
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 6 +
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 9 +-
.../machine-scheduler-sink-trivial-remats.mir | 98 +++++++--------
.../AMDGPU/regpressure-physreg-limits.mir | 115 +++++++++++++++++-
.../CodeGen/AMDGPU/sched-physreg-liveins.mir | 68 +++++++++++
9 files changed, 356 insertions(+), 81 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/sched-physreg-liveins.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 09147452ef4a5..59d9f48faa0c8 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -257,7 +257,7 @@ GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
auto AfterBottomMI = std::next(BottomMI);
if (AfterBottomMI == BBEnd ||
&*AfterBottomMI != UPTracker.getLastTrackedMI()) {
- UPTracker.reset(*BottomMI);
+ UPTracker.reset(*BottomMI, Begin->getParent());
} else {
assert(UPTracker.isValid());
}
@@ -277,16 +277,17 @@ GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
template <typename Range> GCNRegPressure
GCNIterativeScheduler::getSchedulePressure(const Region &R,
Range &&Schedule) const {
- auto const BBEnd = R.Begin->getParent()->end();
+ const MachineBasicBlock *MBB = R.Begin->getParent();
+ auto const BBEnd = MBB->end();
GCNUpwardRPTracker RPTracker(*LIS, MF.getRegInfo());
if (R.End != BBEnd) {
// R.End points to the boundary instruction but the
// schedule doesn't include it
- RPTracker.reset(*R.End);
+ RPTracker.reset(*R.End, MBB);
RPTracker.recede(*R.End);
} else {
// R.End doesn't point to the boundary instruction
- RPTracker.reset(*std::prev(BBEnd));
+ RPTracker.reset(*std::prev(BBEnd), MBB);
}
for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
RPTracker.recede(*getMachineInstr(*--I));
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index dbcd820bc046d..743ba8fb06ff2 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -539,6 +539,7 @@ void GCNRPTracker::removeKilledUnitsAndDecPressure(MCRegister Reg, SlotIndex SI,
}
}
+
GCNRegPressure GCNRPTracker::constructPhysRegPressure() const {
GCNRegPressure Res;
for (unsigned U : PhysLiveRegUnits.set_bits())
@@ -617,6 +618,22 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
PhysLiveRegUnits.reset();
}
+void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
+ const LiveRegSet &VirtLiveRegsSet,
+ const BitVector &PhysLiveUnits) {
+ reset(MRInfo, VirtLiveRegsSet);
+ initPhysLiveUnits(PhysLiveUnits);
+}
+
+void GCNRPTracker::initPhysLiveUnits(const BitVector &PhysLiveUnits) {
+ if (!TrackPhysRegs)
+ return;
+ PhysLiveRegUnits = PhysLiveUnits;
+ GCNRegPressure PhysPressure = constructPhysRegPressure();
+ CurPressure += PhysPressure;
+ MaxPressure = max(MaxPressure, CurPressure);
+}
+
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
LaneBitmask GCNRPTracker::getLastUsedLanes(Register Reg, SlotIndex Pos) const {
return getLanesWithProperty(
@@ -722,7 +739,8 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// GCNDownwardRPTracker
bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
- const LiveRegSet *VirtLiveRegsCopy) {
+ const LiveRegSet *VirtLiveRegsCopy,
+ const MachineBasicBlock *SeedPhysMBB) {
MRI = &MI.getMF()->getRegInfo();
SRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
LastTrackedMI = nullptr;
@@ -732,9 +750,20 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
if (NextMI == MBBEnd)
return false;
GCNRPTracker::reset(*NextMI, VirtLiveRegsCopy, false);
+ if (SeedPhysMBB && TrackPhysRegs &&
+ MI.getMF()->getProperties().hasTracksLiveness())
+ initPhysLiveUnitsFromRegMaskPairs(SeedPhysMBB->liveins());
return true;
}
+bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
+ const LiveRegSet &VirtLiveRegs,
+ const BitVector &PhysLiveUnits) {
+ bool Result = reset(MI, &VirtLiveRegs);
+ initPhysLiveUnits(PhysLiveUnits);
+ return Result;
+}
+
bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
bool UseInternalIterator) {
assert(MRI && "call reset first");
@@ -1090,9 +1119,19 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
if (MBB.empty()) {
LiveIn = LiveOut = getVirtLiveRegs(MBBStartSlot, LIS, MRI);
RPAtMBBEnd = getVirtRegPressure(MRI, LiveIn);
+ const SIRegisterInfo *SRI =
+ static_cast<const SIRegisterInfo *>(TRI);
+ BitVector SeenUnits(SRI->getNumRegUnits());
+ for (const auto &LI : MBB.liveins())
+ if (MRI.isAllocatable(LI.PhysReg))
+ for (MCRegUnit Unit : SRI->regunits(LI.PhysReg))
+ if (!SeenUnits.test(static_cast<unsigned>(Unit))) {
+ SeenUnits.set(static_cast<unsigned>(Unit));
+ RPAtMBBEnd.inc(Unit, *SRI);
+ }
} else {
GCNDownwardRPTracker RPT(LIS, MRI);
- RPT.reset(MBB.front());
+ RPT.reset(MBB.front(), /*VirtLiveRegs=*/nullptr, &MBB);
LiveIn = RPT.getVirtLiveRegs();
@@ -1107,7 +1146,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
} else {
GCNUpwardRPTracker RPT(LIS, MRI);
- RPT.reset(MRI, MBBLastSlot);
+ RPT.reset(MRI, MBBLastSlot, &MBB);
LiveOut = RPT.getVirtLiveRegs();
RPAtMBBEnd = RPT.getPressure();
@@ -1178,7 +1217,8 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
const MachineInstr *MaxPressureMI = nullptr;
GCNUpwardRPTracker RPT(LIS, MRI);
for (const MachineBasicBlock &MBB : MF) {
- RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
+ RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot(),
+ &MBB);
for (const MachineInstr &MI : reverse(MBB)) {
RPT.recede(MI);
unsigned NumRegs = RPT.getMaxPressure().getNumRegs(Kind);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 9d7ec2d557eda..3091f8f85f89d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -372,6 +372,10 @@ class GCNRPTracker {
void reset(const MachineInstr &MI, const LiveRegSet *VirtLiveRegsCopy,
bool After);
+ // reset tracker and set live register set to the specified value.
+ void reset(const MachineRegisterInfo &MRInfo,
+ const LiveRegSet &VirtLiveRegsSet);
+
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
void bumpDeadDefs(ArrayRef<VRegMaskOrUnit> DeadDefs);
@@ -404,14 +408,31 @@ class GCNRPTracker {
// TrackPhysRegInTrackers are true.
void setPhysRegTracking();
- // reset tracker and set live register set to the specified value.
+ // Reset tracker with both virtual and physical live register state.
void reset(const MachineRegisterInfo &MRInfo,
- const LiveRegSet &VirtLiveRegsSet);
+ const LiveRegSet &VirtLiveRegsSet,
+ const BitVector &PhysLiveUnits);
// live regs for the current state
const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
+ const BitVector &getPhysLiveRegUnits() const { return PhysLiveRegUnits; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
+ /// Initialize PhysLiveRegUnits from a range of RegisterMaskPair entries
+ /// and update CurPressure/MaxPressure accordingly.
+ template <typename RangeT>
+ void initPhysLiveUnitsFromRegMaskPairs(RangeT &&Pairs) {
+ assert(TrackPhysRegs && "physical register tracking must be enabled");
+ for (const auto &RM : Pairs)
+ if (MRI->isAllocatable(RM.PhysReg))
+ addUnitsAndIncPressure(RM.PhysReg, CurPressure);
+ MaxPressure = max(MaxPressure, CurPressure);
+ }
+
+ /// Restore PhysLiveRegUnits from a previously saved BitVector and update
+ /// CurPressure/MaxPressure accordingly.
+ void initPhysLiveUnits(const BitVector &PhysLiveUnits);
+
void clearMaxPressure() { MaxPressure.clear(); }
GCNRegPressure getPressure() const { return CurPressure; }
@@ -434,20 +455,30 @@ class GCNUpwardRPTracker : public GCNRPTracker {
using GCNRPTracker::reset;
- /// reset tracker at the specified slot index \p SI.
- void reset(const MachineRegisterInfo &MRI, SlotIndex SI) {
+ /// reset tracker at the specified slot index \p SI. If \p SeedPhysMBB is
+ /// non-null, also seed physical live-out state from that MBB's successors.
+ void reset(const MachineRegisterInfo &MRI, SlotIndex SI,
+ const MachineBasicBlock *SeedPhysMBB = nullptr) {
GCNRPTracker::reset(MRI, llvm::getVirtLiveRegs(SI, LIS, MRI));
+ if (SeedPhysMBB && TrackPhysRegs &&
+ SeedPhysMBB->getParent()->getProperties().hasTracksLiveness())
+ initPhysLiveUnitsFromRegMaskPairs(SeedPhysMBB->liveouts());
}
- /// reset tracker to the end of the \p MBB.
+ /// reset tracker to the end of the \p MBB and seed physical live-outs
+ /// from the MBB's successors.
void reset(const MachineBasicBlock &MBB) {
SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
- reset(MBB.getParent()->getRegInfo(), MBBLastSlot);
+ reset(MBB.getParent()->getRegInfo(), MBBLastSlot, &MBB);
}
/// reset tracker to the point just after \p MI (in program order).
- void reset(const MachineInstr &MI) {
- reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot());
+ /// If \p SeedPhysMBB is non-null, also seed physical live-out state from
+ /// that MBB's successors.
+ void reset(const MachineInstr &MI,
+ const MachineBasicBlock *SeedPhysMBB = nullptr) {
+ reset(MI.getMF()->getRegInfo(), LIS.getInstructionIndex(MI).getDeadSlot(),
+ SeedPhysMBB);
}
/// Move to the state of RP just before the \p MI . If \p UseInternalIterator
@@ -496,8 +527,17 @@ class GCNDownwardRPTracker : public GCNRPTracker {
/// Reset tracker to the point before the \p MI
/// filling \p VirtLiveRegs upon this point using LIS.
+ /// If \p SeedPhysMBB is non-null, also seed physical live-in state from
+ /// that MBB's live-in list.
+ /// \p returns false if block is empty except debug values.
+ bool reset(const MachineInstr &MI, const LiveRegSet *VirtLiveRegs = nullptr,
+ const MachineBasicBlock *SeedPhysMBB = nullptr);
+
+ /// Reset tracker to the point before \p MI, restoring both virtual and
+ /// physical register state from saved snapshots.
/// \p returns false if block is empty except debug values.
- bool reset(const MachineInstr &MI, const LiveRegSet *VirtLiveRegs = nullptr);
+ bool reset(const MachineInstr &MI, const LiveRegSet &VirtLiveRegs,
+ const BitVector &PhysLiveUnits);
/// Move to the state right before the next MI or after the end of MBB.
/// \p returns false if reached end of the block.
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index e5c34930bb29c..b103b9daa8969 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1002,8 +1002,12 @@ GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
if (Regions[RegionIdx].first == Regions[RegionIdx].second)
return llvm::getVirtRegPressure(MRI, VirtLiveIns[RegionIdx]);
GCNDownwardRPTracker RPTracker(*LIS, MF.getRegInfo());
- RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
- &VirtLiveIns[RegionIdx]);
+ if (!PhysLiveIns.empty())
+ RPTracker.reset(*Regions[RegionIdx].first, VirtLiveIns[RegionIdx],
+ PhysLiveIns[RegionIdx]);
+ else
+ RPTracker.reset(*Regions[RegionIdx].first, &VirtLiveIns[RegionIdx]);
+ RPTracker.advance(Regions[RegionIdx].second);
return RPTracker.moveMaxPressure();
}
@@ -1048,7 +1052,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
if (LiveInIt != MBBVirtLiveIns.end()) {
auto LiveIn = std::move(LiveInIt->second);
- RPTracker.reset(*MBB->begin(), &LiveIn);
+ RPTracker.reset(*MBB->begin(), &LiveIn, MBB);
MBBVirtLiveIns.erase(LiveInIt);
} else {
I = Rgn.first;
@@ -1056,7 +1060,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
#ifdef EXPENSIVE_CHECKS
assert(isEqual(getVirtLiveRegsBefore(*NonDbgMI, *LIS), LRS));
#endif
- RPTracker.reset(*I, &LRS);
+ RPTracker.reset(*I, &LRS, MBB);
}
for (;;) {
@@ -1064,10 +1068,12 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
if (Regions[CurRegion].first == I || NonDbgMI == I) {
VirtLiveIns[CurRegion] = RPTracker.getVirtLiveRegs();
+ PhysLiveIns[CurRegion] = RPTracker.getPhysLiveRegUnits();
RPTracker.clearMaxPressure();
}
if (Regions[CurRegion].second == I) {
+ PhysLiveOuts[CurRegion] = RPTracker.getPhysLiveRegUnits();
Pressure[CurRegion] = RPTracker.moveMaxPressure();
if (CurRegion-- == RegionIdx)
break;
@@ -1135,6 +1141,8 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
// MachineScheduler after all regions have been recorded by
// GCNScheduleDAGMILive::schedule().
VirtLiveIns.resize(Regions.size());
+ PhysLiveIns.resize(Regions.size());
+ PhysLiveOuts.resize(Regions.size());
Pressure.resize(Regions.size());
RegionsWithHighRP.resize(Regions.size());
RegionsWithExcessRP.resize(Regions.size());
@@ -1181,14 +1189,14 @@ void GCNScheduleDAGMILive::runSchedStages() {
if (GCNTrackers) {
GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
- GCNRPTracker::LiveRegSet *RegionLiveIns =
- &VirtLiveIns[Stage->getRegionIdx()];
+ unsigned Idx = Stage->getRegionIdx();
+ GCNRPTracker::LiveRegSet *RegionLiveIns = &VirtLiveIns[Idx];
reinterpret_cast<GCNRPTracker *>(DownwardTracker)
- ->reset(MRI, *RegionLiveIns);
+ ->reset(MRI, *RegionLiveIns, PhysLiveIns[Idx]);
reinterpret_cast<GCNRPTracker *>(UpwardTracker)
- ->reset(MRI, RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(
- Stage->getRegionIdx()));
+ ->reset(MRI, RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(Idx),
+ PhysLiveOuts[Idx]);
}
ScheduleDAGMILive::schedule();
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 968f81b60e25c..dc4c91e3bc005 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -275,6 +275,12 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// Region live-in cache.
SmallVector<GCNRPTracker::LiveRegSet, 32> VirtLiveIns;
+ // Per-region physical register live-in cache (register unit BitVectors).
+ SmallVector<BitVector, 32> PhysLiveIns;
+
+ // Per-region physical register live-out cache (register unit BitVectors).
+ SmallVector<BitVector, 32> PhysLiveOuts;
+
// Region pressure cache.
SmallVector<GCNRegPressure, 32> Pressure;
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 4e3a55d5a79f7..2b335763dea32 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -290,16 +290,17 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
continue;
if (!RPT.getNext().isValid())
- RPT.reset(MI);
+ RPT.reset(MI, /*VirtLiveRegs=*/nullptr, &MBB);
else { // Advance the state to the current MI.
RPT.advance(MachineBasicBlock::const_iterator(MI));
RPT.advanceBeforeNext();
}
const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getVirtLiveRegs());
+ const BitVector PhysRegsCopy(RPT.getPhysLiveRegUnits());
RegUse Defs, Uses;
if (!processRegUses(MI, Defs, Uses, RPT)) {
- RPT.reset(MI, &LiveRegsCopy);
+ RPT.reset(MI, LiveRegsCopy, PhysRegsCopy);
continue;
}
@@ -323,7 +324,7 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
++Length;
}
if (Length < 2) {
- RPT.reset(MI, &LiveRegsCopy);
+ RPT.reset(MI, LiveRegsCopy, PhysRegsCopy);
continue;
}
@@ -391,7 +392,7 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
}
// Restore the state after processing the end of the bundle.
- RPT.reset(MI, &LiveRegsCopy);
+ RPT.reset(MI, LiveRegsCopy, PhysRegsCopy);
if (!Kill)
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 496279f330063..2bec7e284dc9c 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -11144,8 +11144,6 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-GCNTRACKERS-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-GCNTRACKERS-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-GCNTRACKERS-NEXT: $vgpr8 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -11164,73 +11162,75 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
- ; GFX908-GCNTRACKERS-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
+ ; GFX908-GCNTRACKERS-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: $vgpr8 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]]
; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -11445,8 +11445,6 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
; GFX908-GCNTRACKERS-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 52, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4)
; GFX908-GCNTRACKERS-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sreg_64 = S_MOV_B32 0
- ; GFX908-GCNTRACKERS-NEXT: $vgpr8 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -11507,19 +11505,21 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: $vgpr8 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: $vgpr9 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-GCNTRACKERS-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
index bb5f984c60a3e..bc6f1e1888bbc 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-limits.mir
@@ -32,6 +32,10 @@ body: |
; RPD-LABEL: name: nonallocatable_physreg
; RPD: bb.0:
; RPD: SGPR VGPR
+ ;
+ ; Initial pressure includes $sgpr8_sgpr9 live-in (2 SGPR units).
+ ;
+ ; RPD: 2 0
; RPD: 2 1
; RPD: 2 1 S_NOP 0, implicit-def $scc
; RPD: 2 1
@@ -39,6 +43,10 @@ body: |
; RPD-NOPHYS-LABEL: name: nonallocatable_physreg
; RPD-NOPHYS: bb.0:
; RPD-NOPHYS: SGPR VGPR
+ ;
+ ; Without physreg tracking, no live-in physical register pressure.
+ ;
+ ; RPD-NOPHYS: 0 0
; RPD-NOPHYS: 2 1
; RPD-NOPHYS: 2 1 S_NOP 0, implicit-def $scc
; RPD-NOPHYS: 2 1
@@ -92,12 +100,14 @@ body: |
; RPD: bb.0:
; RPD: SGPR VGPR
;
+ ; Initial pressure includes $sgpr8_sgpr9 live-in (2 SGPR units).
; Downward tracker: the INLINEASM def adds $sgpr10_sgpr11 (2 units =
; 2 SGPRs). At the COPY use of $sgpr11, the corresponding reg-unit
; is not found in the cached live ranges, so it conservatively keeps
; the unit live. The virtual register def leads to the increase in
; pressure from 4 to 5.
;
+ ; RPD: 2 0
; RPD: 2 1
; RPD: 4 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
; RPD: 4 1
@@ -107,9 +117,10 @@ body: |
; RPD-NOPHYS: bb.0:
; RPD-NOPHYS: SGPR VGPR
;
- ; Without physreg tracking, no physical register pressure is counted,
- ; so the INLINEASM shows no changes in pressure.
+ ; Without physreg tracking, no live-in physical register pressure
+ ; and no physical register pressure changes at instructions.
;
+ ; RPD-NOPHYS: 0 0
; RPD-NOPHYS: 2 1
; RPD-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, 0"{{.*}} implicit-def $sgpr10_sgpr11
; RPD-NOPHYS: 2 1
@@ -126,3 +137,103 @@ body: |
GLOBAL_STORE_DWORD_SADDR %2, %4, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
S_ENDPGM 0
...
+
+# Live-out seeding: bb.0 defines $sgpr10 via INLINEASM and branches to bb.1.
+# bb.1 has $sgpr10 as a live-in. The upward tracker for bb.0 is initialized
+# with bb.0's live-outs (= successor bb.1's live-ins). Without live-out
+# seeding, the upward tracker treats the INLINEASM def as dead and never
+# accounts for $sgpr10's pressure in the region below the def.
+---
+name: physreg_liveout
+tracksRegLiveness: true
+machineFunctionInfo:
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; RPU-LABEL: name: physreg_liveout
+ ; RPU: bb.0:
+ ; RPU: SGPR VGPR
+ ;
+ ; Top of bb.0: after receding past all instructions, $sgpr8_sgpr9 remains
+ ; live from the COPY use. SGPR=2 ($sgpr8_sgpr9 = 2 reg-units).
+ ;
+ ; RPU: 2 0
+ ; RPU: 2 0 %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ ; RPU: 2 0
+ ; RPU: 2 0 %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM
+ ; RPU: 2 0
+ ;
+ ; Below the INLINEASM def, $sgpr10 is live (from live-out seeding),
+ ; so SGPR=3 (%1 = 2 units + $sgpr10 = 1 unit).
+ ;
+ ; RPU: 3 0 INLINEASM &"s_mov_b32 $0, 0"{{.*}} implicit-def $sgpr10
+ ; RPU: 3 0
+ ; RPU: 3 0 S_BRANCH %bb.1
+ ; RPU: 3 0
+ ;
+ ; RPU-NOPHYS-LABEL: name: physreg_liveout
+ ; RPU-NOPHYS: bb.0:
+ ; RPU-NOPHYS: SGPR VGPR
+ ;
+ ; Without physreg tracking, no live-out seeding: the INLINEASM def is dead
+ ; from the upward tracker's perspective, so $sgpr10 is never counted.
+ ; $sgpr8_sgpr9 is also not tracked, so top pressure is 0.
+ ;
+ ; RPU-NOPHYS: 0 0
+ ; RPU-NOPHYS: 2 0 %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ ; RPU-NOPHYS: 2 0
+ ; RPU-NOPHYS: 2 0 %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM
+ ; RPU-NOPHYS: 2 0
+ ; RPU-NOPHYS: 2 0 INLINEASM &"s_mov_b32 $0, 0"{{.*}} implicit-def $sgpr10
+ ; RPU-NOPHYS: 2 0
+ ; RPU-NOPHYS: 2 0 S_BRANCH %bb.1
+ ; RPU-NOPHYS: 2 0
+ ;
+ ; RPD-LABEL: name: physreg_liveout
+ ; RPD: bb.0:
+ ; RPD: SGPR VGPR
+ ;
+ ; Downward tracker: initial pressure includes $sgpr8_sgpr9 live-in (2).
+ ; INLINEASM adds $sgpr10 (1 unit), bringing pressure to 3.
+ ;
+ ; RPD: 2 0
+ ; RPD: 4 0 %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ ; RPD: 2 0
+ ; RPD: 4 0 %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM
+ ; RPD: 2 0
+ ; RPD: 3 0 INLINEASM &"s_mov_b32 $0, 0"{{.*}} implicit-def $sgpr10
+ ; RPD: 3 0
+ ; RPD: 3 0 S_BRANCH %bb.1
+ ; RPD: 3 0
+ ;
+ ; RPD-NOPHYS-LABEL: name: physreg_liveout
+ ; RPD-NOPHYS: bb.0:
+ ; RPD-NOPHYS: SGPR VGPR
+ ;
+ ; Without physreg tracking: no live-in pressure, INLINEASM has no effect.
+ ;
+ ; RPD-NOPHYS: 0 0
+ ; RPD-NOPHYS: 2 0 %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ ; RPD-NOPHYS: 2 0
+ ; RPD-NOPHYS: 4 0 %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM
+ ; RPD-NOPHYS: 2 0
+ ; RPD-NOPHYS: 2 0 INLINEASM &"s_mov_b32 $0, 0"{{.*}} implicit-def $sgpr10
+ ; RPD-NOPHYS: 2 0
+ ; RPD-NOPHYS: 2 0 S_BRANCH %bb.1
+ ; RPD-NOPHYS: 2 0
+ bb.0:
+ liveins: $sgpr8_sgpr9
+ successors: %bb.1
+
+ %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ INLINEASM &"s_mov_b32 $0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr10
+ S_BRANCH %bb.1
+
+ bb.1:
+ liveins: $sgpr10
+
+ %2:vgpr_32 = COPY $sgpr10
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %3, %2, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/sched-physreg-liveins.mir b/llvm/test/CodeGen/AMDGPU/sched-physreg-liveins.mir
new file mode 100644
index 0000000000000..95483ab85c5ff
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-physreg-liveins.mir
@@ -0,0 +1,68 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler \
+# RUN: -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler \
+# RUN: -o /dev/null %s 2>&1 | FileCheck --check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler \
+# RUN: -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 \
+# RUN: -debug-only=machine-scheduler -o /dev/null %s 2>&1 \
+# RUN: | FileCheck --check-prefix=GCN-NOPHYS %s
+
+# Test that physical register live-ins from MBB liveins are correctly
+# included in per-region pressure when using GCN trackers in the scheduler.
+#
+# The function has two scheduling regions separated by SCHED_BARRIER.
+# $sgpr10 is live-in to the MBB but only used in the second region.
+# With physical register tracking, $sgpr10 should contribute 1 SGPR
+# of pressure in the first region (live-through). Without physical
+# tracking, it should not appear.
+
+--- |
+ define amdgpu_kernel void @physreg_livein_across_regions() #0 { ret void }
+ attributes #0 = { "target-cpu"="gfx900" }
+...
+
+# GCN-LABEL: physreg_livein_across_regions
+#
+# Region 0 (bottom of block, processed first by scheduler):
+# $sgpr10 is used here via COPY. With phys tracking, $sgpr10 contributes
+# 1 extra SGPR compared to NOPHYS.
+# GCN: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 3
+# GCN: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 3
+#
+# Region 1 (top of block, processed second):
+# $sgpr10 is live-through (MBB live-in, not used until region 0).
+# Physical live-in tracking captures it in per-region pressure.
+# GCN: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 5
+# GCN: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 5
+
+# GCN-NOPHYS-LABEL: physreg_livein_across_regions
+#
+# Region 0: $sgpr10 not tracked, only virtual regs contribute.
+# GCN-NOPHYS: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 2
+# GCN-NOPHYS: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 2
+#
+# Region 1: No physical register pressure.
+# GCN-NOPHYS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 4
+# GCN-NOPHYS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 4
+
+---
+name: physreg_livein_across_regions
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5, $sgpr10
+
+ ; Region 1: virtual reg defs only, $sgpr10 is live-through.
+ %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ SCHED_BARRIER 0
+
+ ; Region 2: uses $sgpr10.
+ %2:vgpr_32 = COPY $sgpr10
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %3, %2, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
>From 40b3190112ca2cfd2c7c8da6681220e4d19c5273 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 12 Jun 2026 17:01:19 -0500
Subject: [PATCH 23/25] Fixed rebase.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 28 +++++++-------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 41 +++++++++++----------
2 files changed, 37 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 743ba8fb06ff2..790d4e86f1947 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -1112,13 +1112,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
SlotIndex MBBStartSlot = LIS.getSlotIndexes()->getMBBStartIdx(&MBB);
SlotIndex MBBLastSlot = LIS.getSlotIndexes()->getMBBLastIdx(&MBB);
- GCNRPTracker::LiveRegSet LiveIn, LiveOut;
+ GCNRPTracker::LiveRegSet VirtLiveIn, VirtLiveOut;
GCNRegPressure RPAtMBBEnd;
if (UseDownwardTracker) {
if (MBB.empty()) {
- LiveIn = LiveOut = getVirtLiveRegs(MBBStartSlot, LIS, MRI);
- RPAtMBBEnd = getVirtRegPressure(MRI, LiveIn);
+ VirtLiveIn = VirtLiveOut = getVirtLiveRegs(MBBStartSlot, LIS, MRI);
+ RPAtMBBEnd = getVirtRegPressure(MRI, VirtLiveIn);
const SIRegisterInfo *SRI =
static_cast<const SIRegisterInfo *>(TRI);
BitVector SeenUnits(SRI->getNumRegUnits());
@@ -1133,7 +1133,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
GCNDownwardRPTracker RPT(LIS, MRI);
RPT.reset(MBB.front(), /*VirtLiveRegs=*/nullptr, &MBB);
- LiveIn = RPT.getVirtLiveRegs();
+ VirtLiveIn = RPT.getVirtLiveRegs();
while (!RPT.advanceBeforeNext()) {
GCNRegPressure RPBeforeMI = RPT.getPressure();
@@ -1141,14 +1141,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RP.emplace_back(RPBeforeMI, RPT.getPressure());
}
- LiveOut = RPT.getVirtLiveRegs();
+ VirtLiveOut = RPT.getVirtLiveRegs();
RPAtMBBEnd = RPT.getPressure();
}
} else {
GCNUpwardRPTracker RPT(LIS, MRI);
RPT.reset(MRI, MBBLastSlot, &MBB);
- LiveOut = RPT.getVirtLiveRegs();
+ VirtLiveOut = RPT.getVirtLiveRegs();
RPAtMBBEnd = RPT.getPressure();
for (auto &MI : reverse(MBB)) {
@@ -1158,12 +1158,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RP.emplace_back(RPT.getPressure(), RPT.getMaxPressure());
}
- LiveIn = RPT.getVirtLiveRegs();
+ VirtLiveIn = RPT.getVirtLiveRegs();
}
- OS << PFX " Live-in: " << llvm::print(LiveIn, MRI);
+ OS << PFX " Live-in: " << llvm::print(VirtLiveIn, MRI);
if (!UseDownwardTracker)
- ReportLISMismatchIfAny(LiveIn, getVirtLiveRegs(MBBStartSlot, LIS, MRI));
+ ReportLISMismatchIfAny(VirtLiveIn,
+ getVirtLiveRegs(MBBStartSlot, LIS, MRI));
OS << PFX " SGPR VGPR\n";
int I = 0;
@@ -1179,13 +1180,14 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
}
OS << printRP(RPAtMBBEnd) << '\n';
- OS << PFX " Live-out:" << llvm::print(LiveOut, MRI);
+ OS << PFX " Live-out:" << llvm::print(VirtLiveOut, MRI);
if (UseDownwardTracker)
- ReportLISMismatchIfAny(LiveOut, getVirtLiveRegs(MBBLastSlot, LIS, MRI));
+ ReportLISMismatchIfAny(VirtLiveOut,
+ getVirtLiveRegs(MBBLastSlot, LIS, MRI));
GCNRPTracker::LiveRegSet LiveThrough;
- for (auto [Reg, Mask] : LiveIn) {
- LaneBitmask MaskIntersection = Mask & LiveOut.lookup(Reg);
+ for (auto [Reg, Mask] : VirtLiveIn) {
+ LaneBitmask MaskIntersection = Mask & VirtLiveOut.lookup(Reg);
if (MaskIntersection.any()) {
LaneBitmask LTMask = getRegLiveThroughMask(
MRI, LIS, Reg, MBBStartSlot, MBBLastSlot, MaskIntersection);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index b103b9daa8969..8c03dd076f554 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1047,20 +1047,20 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
--CurRegion;
auto I = MBB->begin();
- auto LiveInIt = MBBVirtLiveIns.find(MBB);
+ auto VirtLiveInIt = MBBVirtLiveIns.find(MBB);
auto &Rgn = Regions[CurRegion];
auto *NonDbgMI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second);
- if (LiveInIt != MBBVirtLiveIns.end()) {
- auto LiveIn = std::move(LiveInIt->second);
- RPTracker.reset(*MBB->begin(), &LiveIn, MBB);
- MBBVirtLiveIns.erase(LiveInIt);
+ if (VirtLiveInIt != MBBVirtLiveIns.end()) {
+ auto VirtLiveIn = std::move(VirtLiveInIt->second);
+ RPTracker.reset(*MBB->begin(), &VirtLiveIn, MBB);
+ MBBVirtLiveIns.erase(VirtLiveInIt);
} else {
I = Rgn.first;
- auto LRS = BBVirtLiveInMap.lookup(NonDbgMI);
+ auto VirtLiveInSet = BBVirtLiveInMap.lookup(NonDbgMI);
#ifdef EXPENSIVE_CHECKS
- assert(isEqual(getVirtLiveRegsBefore(*NonDbgMI, *LIS), LRS));
+ assert(isEqual(getVirtLiveRegsBefore(*NonDbgMI, *LIS), VirtLiveInSet));
#endif
- RPTracker.reset(*I, &LRS, MBB);
+ RPTracker.reset(*I, &VirtLiveInSet, MBB);
}
for (;;) {
@@ -1190,10 +1190,10 @@ void GCNScheduleDAGMILive::runSchedStages() {
GCNDownwardRPTracker *DownwardTracker = S.getDownwardTracker();
GCNUpwardRPTracker *UpwardTracker = S.getUpwardTracker();
unsigned Idx = Stage->getRegionIdx();
- GCNRPTracker::LiveRegSet *RegionLiveIns = &VirtLiveIns[Idx];
+ GCNRPTracker::LiveRegSet *VirtRegionLiveIns = &VirtLiveIns[Idx];
reinterpret_cast<GCNRPTracker *>(DownwardTracker)
- ->reset(MRI, *RegionLiveIns, PhysLiveIns[Idx]);
+ ->reset(MRI, *VirtRegionLiveIns, PhysLiveIns[Idx]);
reinterpret_cast<GCNRPTracker *>(UpwardTracker)
->reset(MRI, RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(Idx),
PhysLiveOuts[Idx]);
@@ -2744,11 +2744,12 @@ bool RewriteMFMAFormStage::rewrite(
// Bulk update the LIS.
DAG.LIS->reanalyze(DAG.MF);
// Liveins may have been modified for cross RC copies
- RegionPressureMap LiveInUpdater(&DAG, false);
- LiveInUpdater.buildVirtLiveRegMap();
+ RegionPressureMap VirtLiveInUpdater(&DAG, false);
+ VirtLiveInUpdater.buildVirtLiveRegMap();
for (unsigned Region = 0; Region < DAG.Regions.size(); Region++)
- DAG.VirtLiveIns[Region] = LiveInUpdater.getVirtLiveRegsForRegionIdx(Region);
+ DAG.VirtLiveIns[Region] =
+ VirtLiveInUpdater.getVirtLiveRegsForRegionIdx(Region);
DAG.Pressure[RegionIdx] = DAG.getRealRegPressure(RegionIdx);
@@ -2873,11 +2874,13 @@ PreRARematStage::RematReg::RematReg(
// Mark regions in which the rematerializable register is live.
Register Reg = getReg();
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
- auto LiveInIt = DAG.VirtLiveIns[I].find(Reg);
- if (LiveInIt != DAG.VirtLiveIns[I].end())
+ auto VirtLiveInIt = DAG.VirtLiveIns[I].find(Reg);
+ if (VirtLiveInIt != DAG.VirtLiveIns[I].end())
LiveIn.set(I);
- const auto &LiveOuts = DAG.RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(I);
- if (auto LiveOutIt = LiveOuts.find(Reg); LiveOutIt != LiveOuts.end())
+ const auto &VirtLiveOuts =
+ DAG.RegionVirtLiveOuts.getVirtLiveRegsForRegionIdx(I);
+ if (auto VirtLiveOutIt = VirtLiveOuts.find(Reg);
+ VirtLiveOutIt != VirtLiveOuts.end())
LiveOut.set(I);
}
Live |= LiveIn;
@@ -3023,8 +3026,8 @@ MachineInstr *PreRARematStage::ScoredRemat::rematerialize(
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
- LaneBitmask LiveInMask = DAG.VirtLiveIns[Remat->UseRegion].at(UseReg);
- LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
+ LaneBitmask VirtLiveInMask = DAG.VirtLiveIns[Remat->UseRegion].at(UseReg);
+ LaneBitmask UncoveredLanes = LM & ~(VirtLiveInMask & LM);
// If this register has lanes not covered by the VirtLiveIns, be sure they
// do not map to any subrange. ref:
// machine-scheduler-sink-trivial-remats.mir::omitted_subrange
>From f0524b83a46dbd071db65c646de6fe6de94d0d7e Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 12 Jun 2026 23:53:43 -0500
Subject: [PATCH 24/25] Added a test to capture physical register liveouts.
---
.../CodeGen/AMDGPU/sched-physreg-liveouts.mir | 84 +++++++++++++++++++
1 file changed, 84 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/sched-physreg-liveouts.mir
diff --git a/llvm/test/CodeGen/AMDGPU/sched-physreg-liveouts.mir b/llvm/test/CodeGen/AMDGPU/sched-physreg-liveouts.mir
new file mode 100644
index 0000000000000..d8db3ff4313e6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-physreg-liveouts.mir
@@ -0,0 +1,84 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler \
+# RUN: -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler \
+# RUN: -o /dev/null %s 2>&1 | FileCheck --check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler \
+# RUN: -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 \
+# RUN: -debug-only=machine-scheduler -o /dev/null %s 2>&1 \
+# RUN: | FileCheck --check-prefix=GCN-NOPHYS %s
+
+# Test that physical register live-outs (derived from successor block liveins)
+# are correctly included in per-region pressure when using GCN trackers.
+#
+# bb.0 defines $sgpr10 in the first scheduling region and branches to bb.1
+# which has $sgpr10 as a live-in. This makes $sgpr10 a live-out of bb.0.
+# The second region in bb.0 does not touch $sgpr10, so it is live-through.
+# With physical register tracking, $sgpr10 should contribute 1 SGPR of
+# pressure in that second region. Without physical tracking, it should not.
+
+--- |
+ define amdgpu_kernel void @physreg_liveout_across_regions() #0 { ret void }
+ attributes #0 = { "target-cpu"="gfx900" }
+...
+
+# GCN-LABEL: physreg_liveout_across_regions
+#
+# Region 0 (bottom of bb.0, processed first by scheduler):
+# $sgpr10 is live-through here (defined above, live-out to bb.1).
+# With phys tracking it contributes +1 SGPR compared to NOPHYS.
+# GCN: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5
+# GCN: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5
+#
+# Region 1 (top of bb.0, processed second):
+# $sgpr4_sgpr5 live-in (+2) and $sgpr10 def (+1) add physical pressure.
+# GCN: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 4
+# GCN: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 4
+#
+# bb.1: $sgpr10 is a physical live-in here.
+# GCN: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 1
+# GCN: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 1
+
+# GCN-NOPHYS-LABEL: physreg_liveout_across_regions
+#
+# Region 0: $sgpr10 live-out not tracked, only virtual regs contribute.
+# GCN-NOPHYS: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4
+# GCN-NOPHYS: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4
+#
+# Region 1: No physical register pressure.
+# GCN-NOPHYS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 2
+# GCN-NOPHYS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 2
+#
+# bb.1: $sgpr10 live-in not tracked.
+# GCN-NOPHYS: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 0
+# GCN-NOPHYS: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 0
+
+---
+name: physreg_liveout_across_regions
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ bb.0:
+ liveins: $sgpr4_sgpr5
+ successors: %bb.1
+
+ ; Region 1: defines $sgpr10, which is live-out to bb.1.
+ %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
+ $sgpr10 = S_MOV_B32 42
+ SCHED_BARRIER 0
+
+ ; Region 0: $sgpr10 is live-through (defined above, live-out to bb.1).
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ GLOBAL_STORE_DWORD_SADDR %2, %2, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_BRANCH %bb.1
+
+ bb.1:
+ liveins: $sgpr10
+
+ %3:vgpr_32 = COPY $sgpr10
+ %4:vreg_64 = IMPLICIT_DEF
+ GLOBAL_STORE_DWORD %4, %3, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
>From 7063751c7885dd643493d3672dfb1db43e081ac5 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Sat, 13 Jun 2026 00:24:38 -0500
Subject: [PATCH 25/25] clang-format.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 12 +++++------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 21 ++++++++++----------
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 22 ++++++++++-----------
3 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 790d4e86f1947..06cc8e7e67441 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -539,7 +539,6 @@ void GCNRPTracker::removeKilledUnitsAndDecPressure(MCRegister Reg, SlotIndex SI,
}
}
-
GCNRegPressure GCNRPTracker::constructPhysRegPressure() const {
GCNRegPressure Res;
for (unsigned U : PhysLiveRegUnits.set_bits())
@@ -564,10 +563,10 @@ LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
return LiveMask;
}
-GCNRPTracker::LiveRegSet llvm::getVirtLiveRegs(SlotIndex SI,
- const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI,
- GCNRegPressure::RegKind RegKind) {
+GCNRPTracker::LiveRegSet
+llvm::getVirtLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind) {
GCNRPTracker::LiveRegSet VirtLiveRegs;
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
auto Reg = Register::index2VirtReg(I);
@@ -1119,8 +1118,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
if (MBB.empty()) {
VirtLiveIn = VirtLiveOut = getVirtLiveRegs(MBBStartSlot, LIS, MRI);
RPAtMBBEnd = getVirtRegPressure(MRI, VirtLiveIn);
- const SIRegisterInfo *SRI =
- static_cast<const SIRegisterInfo *>(TRI);
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(TRI);
BitVector SeenUnits(SRI->getNumRegUnits());
for (const auto &LI : MBB.liveins())
if (MRI.isAllocatable(LI.PhysReg))
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 3091f8f85f89d..a8d8d83eb8250 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -410,8 +410,7 @@ class GCNRPTracker {
// Reset tracker with both virtual and physical live register state.
void reset(const MachineRegisterInfo &MRInfo,
- const LiveRegSet &VirtLiveRegsSet,
- const BitVector &PhysLiveUnits);
+ const LiveRegSet &VirtLiveRegsSet, const BitVector &PhysLiveUnits);
// live regs for the current state
const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
@@ -442,8 +441,8 @@ class GCNRPTracker {
GCNRPTracker::LiveRegSet
getVirtLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
- const MachineRegisterInfo &MRI,
- GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
+ const MachineRegisterInfo &MRI,
+ GCNRegPressure::RegKind RegKind = GCNRegPressure::TOTAL_KINDS);
////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker
@@ -603,7 +602,7 @@ LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
/// Note: there is no entry in the map for instructions with empty live reg set
/// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R))
template <typename Range>
-DenseMap<MachineInstr*, GCNRPTracker::LiveRegSet>
+DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
getVirtLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
std::vector<SlotIndex> Indexes;
Indexes.reserve(llvm::size(R));
@@ -642,20 +641,20 @@ getVirtLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) {
}
inline GCNRPTracker::LiveRegSet getVirtLiveRegsAfter(const MachineInstr &MI,
- const LiveIntervals &LIS) {
+ const LiveIntervals &LIS) {
return getVirtLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
- MI.getMF()->getRegInfo());
+ MI.getMF()->getRegInfo());
}
-inline GCNRPTracker::LiveRegSet getVirtLiveRegsBefore(const MachineInstr &MI,
- const LiveIntervals &LIS) {
+inline GCNRPTracker::LiveRegSet
+getVirtLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS) {
return getVirtLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS,
- MI.getMF()->getRegInfo());
+ MI.getMF()->getRegInfo());
}
template <typename Range>
GCNRegPressure getVirtRegPressure(const MachineRegisterInfo &MRI,
- Range &&LiveRegs) {
+ Range &&LiveRegs) {
GCNRegPressure Res;
for (const auto &RM : LiveRegs)
Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8c03dd076f554..6e6c7d91e3d47 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1123,8 +1123,8 @@ GCNScheduleDAGMILive::getRegionVirtLiveOutMap() const {
void RegionPressureMap::buildVirtLiveRegMap() {
IdxToInstruction.clear();
- RegionVirtLiveRegMap =
- IsLiveOut ? DAG->getRegionVirtLiveOutMap() : DAG->getRegionVirtLiveInMap();
+ RegionVirtLiveRegMap = IsLiveOut ? DAG->getRegionVirtLiveOutMap()
+ : DAG->getRegionVirtLiveInMap();
for (unsigned I = 0; I < DAG->Regions.size(); I++) {
auto &[RegionBegin, RegionEnd] = DAG->Regions[I];
// Skip empty regions.
@@ -1425,9 +1425,9 @@ Printable PreRARematStage::ScoredRemat::print() const {
#endif
bool PreRARematStage::initGCNSchedStage() {
- // FIXME: This pass will invalidate cached BBVirtLiveInMap and MBBVirtLiveIns for
- // regions inbetween the defs and region we sinked the def to. Will need to be
- // fixed if there is another pass after this pass.
+ // FIXME: This pass will invalidate cached BBVirtLiveInMap and MBBVirtLiveIns
+ // for regions inbetween the defs and region we sinked the def to. Will need
+ // to be fixed if there is another pass after this pass.
assert(!S.hasNextStage());
if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
@@ -1732,12 +1732,12 @@ bool GCNSchedStage::initGCNRegion() {
PressureBefore = DAG.Pressure[RegionIdx];
- LLVM_DEBUG(
- dbgs() << "Pressure before scheduling:\nRegion live-ins:"
- << print(DAG.VirtLiveIns[RegionIdx], DAG.MRI)
- << "Region live-in pressure: "
- << print(llvm::getVirtRegPressure(DAG.MRI, DAG.VirtLiveIns[RegionIdx]))
- << "Region register pressure: " << print(PressureBefore));
+ LLVM_DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"
+ << print(DAG.VirtLiveIns[RegionIdx], DAG.MRI)
+ << "Region live-in pressure: "
+ << print(llvm::getVirtRegPressure(
+ DAG.MRI, DAG.VirtLiveIns[RegionIdx]))
+ << "Region register pressure: " << print(PressureBefore));
S.HasHighPressure = false;
S.KnownExcessRP = isRegionWithExcessRP();
More information about the llvm-branch-commits
mailing list