[llvm-branch-commits] [llvm] [AMDGPU] Physical register tracking in GCN trackers. (PR #184275)
Dhruva Chakrabarti via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 31 02:26:20 PDT 2026
https://github.com/dhruvachak updated https://github.com/llvm/llvm-project/pull/184275
>From 0cb10d5ae43e8decc6b9263a9437cc54b21ca234 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 27 Feb 2026 13:13:15 -0600
Subject: [PATCH 1/7] [AMDGPU] Physical register tracking in GCN trackers.
Previously, the GCN tracker only monitored virtual registers, leading to
inaccurate register pressure estimates and sub-optimal scheduling decisions
when physical registers were present. This patch adds support for tracking
physical registers in GCN trackers. Virtual and physical register tracking
are maintained separately. Similar to virtual LiveRegs, physical LiveRegs
are now maintained. The tracking flow closely follows that of the GCN
trackers in the recede, advance, and pressure-increment methods.
Tracking physical registers leads to better register allocation, no more
allocation failures, and more accurate pressure estimates. Existing tests
have been updated to reflect the above. A new test schedule-gcn-physreg-pressure
has been added that validates physical register tracking across multiple
scenarios.
Assisted-by: Cursor
---
llvm/include/llvm/CodeGen/RegisterPressure.h | 14 +
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 9 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 267 ++-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 102 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 28 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 6 +-
.../lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 2 +-
.../machine-scheduler-sink-trivial-remats.mir | 94 +-
.../AMDGPU/materialize-frame-index-sgpr.ll | 1558 +++++++++++++++++
.../schedule-amdgpu-tracker-physreg-crash.ll | 12 +-
.../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 32 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 513 ++++++
12 files changed, 2539 insertions(+), 98 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 7485be6dcb351..01a944f386014 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -293,6 +293,20 @@ class LiveRegSet {
}
public:
+ LiveRegSet() = default;
+
+ // Copy assignment operator - copies live register contents.
+ // Note: Both LiveRegSets must have been initialized with init() first.
+ LiveRegSet &operator=(const LiveRegSet &Other) {
+ if (this != &Other) {
+ NumRegUnits = Other.NumRegUnits;
+ Regs.clear();
+ for (const IndexMaskPair &Pair : Other.Regs)
+ Regs.insert(Pair);
+ }
+ return *this;
+ }
+
LLVM_ABI void clear();
LLVM_ABI void init(const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index dff153cebdd4c..ac5a8c7802ff5 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -238,11 +238,8 @@ class SchedStrategyStub : public MachineSchedStrategy {
GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
StrategyKind S)
- : BaseClass(C, std::make_unique<SchedStrategyStub>())
- , Context(C)
- , Strategy(S)
- , UPTracker(*LIS) {
-}
+ : BaseClass(C, std::make_unique<SchedStrategyStub>()), Context(C),
+ Strategy(S), UPTracker(*LIS, Context->MF->getRegInfo()) {}
// returns max pressure for a region
GCNRegPressure
@@ -281,7 +278,7 @@ template <typename Range> GCNRegPressure
GCNIterativeScheduler::getSchedulePressure(const Region &R,
Range &&Schedule) const {
auto const BBEnd = R.Begin->getParent()->end();
- GCNUpwardRPTracker RPTracker(*LIS);
+ GCNUpwardRPTracker RPTracker(*LIS, MF.getRegInfo());
if (R.End != BBEnd) {
// R.End points to the boundary instruction but the
// schedule doesn't include it
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 571ca3475305c..5a980f3014654 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,8 +14,10 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -50,8 +52,32 @@ void GCNRegPressure::inc(unsigned Reg,
LaneBitmask PrevMask,
LaneBitmask NewMask,
const MachineRegisterInfo &MRI) {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ const TargetRegisterClass *RC;
+ if (Register(Reg).isVirtual()) {
+ RC = MRI.getRegClass(Reg);
+ } else {
+ if (!MRI.isAllocatable(Reg))
+ return;
+ RC = TRI->getMinimalPhysRegClass(Reg);
+ if (!RC)
+ return;
+ }
+
+ unsigned RegKind = getRegKind(RC, STI);
unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(NewMask);
unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(PrevMask);
+ // If multiple bits are set in the input masks for physical SGPRs, the
+ // expected result does not match what getNumCoveredRegs returns. This is
+ // because it returns the number of vector lanes, not the number of 32-bit
+ // regs. Hence, cap to the register's actual size so e.g. a 32-bit SGPR counts
+ // as 1 and VCC (64-bit) counts as 2, not 32.
+ if (Register(Reg).isPhysical() && RegKind == SGPR) {
+ unsigned MaxCovered = divideCeil(TRI->getRegSizeInBits(*RC), 32);
+ NewNumCoveredRegs = std::min(NewNumCoveredRegs, MaxCovered);
+ PrevNumCoveredRegs = std::min(PrevNumCoveredRegs, MaxCovered);
+ }
if (NewNumCoveredRegs == PrevNumCoveredRegs)
return;
@@ -64,10 +90,6 @@ void GCNRegPressure::inc(unsigned Reg,
assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
"prev mask should always be lesser than new");
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
- unsigned RegKind = getRegKind(RC, STI);
if (TRI->getRegSizeInBits(*RC) != 32) {
// Reg is from a tuple register class.
if (PrevMask.none()) {
@@ -471,6 +493,77 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
}
+bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
+ const LiveRange *LR = LIS.getCachedRegUnit(Unit);
+ if (!LR)
+ return false;
+ return LR->liveAt(SI);
+}
+
+bool GCNRPTracker::allRegUnitsLive(Register Reg) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ return llvm::all_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any();
+ });
+}
+
+bool GCNRPTracker::checkRegKilled(Register Reg, SlotIndex SI) const {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ return llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any() &&
+ !isUnitLiveAt(Unit, SI);
+ });
+}
+
+bool GCNRPTracker::eraseKilledUnits(Register Reg, SlotIndex SI) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool IsKilled = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ if (!isUnitLiveAt(Unit, SI)) {
+ IsKilled = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ }
+ return IsKilled;
+}
+
+bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool WasLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.any()) {
+ WasLive = true;
+ PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ return WasLive;
+}
+
+bool GCNRPTracker::insertAllNotLiveUnits(Register Reg) {
+ assert(MRI && "MRI not initialized");
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none()) {
+ WasNotLive = true;
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+ }
+ return WasNotLive;
+}
+
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
const MachineRegisterInfo &MRI,
LaneBitmask LaneMaskFilter) {
@@ -520,6 +613,15 @@ void GCNRPTracker::reset(const MachineInstr &MI,
}
MaxVirtPressure = CurVirtPressure = getRegPressure(*MRI, VirtLiveRegs);
+
+ setPhysRegTracking();
+ // Clear physical register tracking (only if enabled)
+ if (TrackPhysRegs) {
+ PhysLiveRegs.clear();
+ PhysLiveRegs.init(*MRI);
+ MaxPhysPressure.clear();
+ CurPhysPressure.clear();
+ }
}
void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
@@ -528,6 +630,15 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
VirtLiveRegs = VirtLiveRegsSet;
LastTrackedMI = nullptr;
MaxVirtPressure = CurVirtPressure = getRegPressure(MRInfo, VirtLiveRegsSet);
+
+ setPhysRegTracking();
+ // Clear physical register tracking (only if enabled)
+ if (TrackPhysRegs) {
+ PhysLiveRegs.clear();
+ PhysLiveRegs.init(*MRI);
+ MaxPhysPressure.clear();
+ CurPhysPressure.clear();
+ }
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
@@ -601,6 +712,45 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
? max(CurVirtPressure + ECDefPressure, MaxVirtPressure)
: max(CurVirtPressure, MaxVirtPressure);
+ // Track physical register defs and uses (only if enabled).
+ if (TrackPhysRegs) {
+ // Kill physical register defs (moving backward in upward tracking).
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!MRI->isAllocatable(Reg))
+ continue;
+
+ // Check if any unit of this register was live before and erase them.
+ bool WasLive = eraseAllLiveUnits(Reg);
+
+ // Update pressure once per register if it was live.
+ if (WasLive)
+ CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+
+ // Make physical register uses alive (moving backward in upward tracking).
+ for (const MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!MRI->isAllocatable(Reg))
+ continue;
+ // Check if any unit of this register was not live before and insert them.
+ bool WasNotLive = insertAllNotLiveUnits(Reg);
+
+ // Update pressure once per register if it wasn't live before.
+ if (WasNotLive) {
+ CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
+ }
+
assert(CurVirtPressure == getRegPressure(*MRI, VirtLiveRegs));
}
@@ -680,7 +830,29 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
}
}
+ // Track physical register kills (only if enabled).
+ if (TrackPhysRegs) {
+ // Iterate over actual instruction operands to track which registers die.
+ SmallSet<Register, 8> SeenRegs;
+ for (const auto &MO : CurrMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!MRI->isAllocatable(Reg) || !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is killed and erase killed units.
+ bool IsKilled = eraseKilledUnits(Reg, SI);
+
+ // Update pressure once per register if it was live and is now killed.
+ if (IsKilled)
+ CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+ }
+
MaxVirtPressure = max(MaxVirtPressure, CurVirtPressure);
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
LastTrackedMI = nullptr;
@@ -698,7 +870,7 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
const MachineInstr *CurrMI = LastTrackedMI;
- // Add new registers or mask bits.
+ // Add new registers or mask bits (virtual registers).
for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
@@ -709,7 +881,33 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
CurVirtPressure.inc(Reg, PrevMask, LiveMask, *MRI);
}
+ // Add new physical register defs (only if enabled).
+ if (TrackPhysRegs) {
+ for (const auto &MO : CurrMI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg))
+ continue;
+
+ // Check if any unit of this register was not live before.
+ bool WasNotLive = false;
+ for (MCRegUnit Unit : MRI->getTargetRegisterInfo()->regunits(Reg)) {
+ VirtRegOrUnit VRU(Unit);
+ LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
+ if (PrevMask.none())
+ WasNotLive = true;
+ // Mark unit as live
+ PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
+ }
+
+ // Update pressure once per register if it wasn't live before.
+ if (WasNotLive)
+ CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
MaxVirtPressure = max(MaxVirtPressure, CurVirtPressure);
+ MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
}
bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator) {
@@ -775,8 +973,10 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
RegisterOperands RegOpers;
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);
- GCNRegPressure TempPressure = CurVirtPressure;
+ GCNRegPressure TempVirtPressure = CurVirtPressure;
+ GCNRegPressure TempPhysPressure = CurPhysPressure;
+ // Process virtual register uses
for (const VRegMaskOrUnit &Use : RegOpers.Uses) {
if (!Use.VRegOrUnit.isVirtualReg())
continue;
@@ -808,10 +1008,10 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
LaneBitmask LiveMask =
It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
LaneBitmask NewMask = LiveMask & ~LastUseMask;
- TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ TempVirtPressure.inc(Reg, LiveMask, NewMask, *MRI);
}
- // Generate liveness for defs.
+ // Generate liveness for virtual register defs.
for (const VRegMaskOrUnit &Def : RegOpers.Defs) {
if (!Def.VRegOrUnit.isVirtualReg())
continue;
@@ -820,10 +1020,51 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
LaneBitmask LiveMask =
It != VirtLiveRegs.end() ? It->second : LaneBitmask(0);
LaneBitmask NewMask = LiveMask | Def.LaneMask;
- TempPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ TempVirtPressure.inc(Reg, LiveMask, NewMask, *MRI);
+ }
+
+ // Process physical registers (only if enabled).
+ if (TrackPhysRegs) {
+ SmallSet<Register, 8> SeenRegs;
+
+ // Process physical register defs.
+ for (const auto &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is not currently live.
+ bool WasNotLive = !allRegUnitsLive(Reg);
+
+ if (WasNotLive && !MO.isDead()) {
+ TempPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
+ *MRI);
+ }
+ }
+
+ // Process physical register uses to find kills.
+ SeenRegs.clear();
+ for (const auto &MO : MI->uses()) {
+ if (!MO.isReg() || !MO.getReg().isPhysical())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical() || !MRI->isAllocatable(Reg) ||
+ !SeenRegs.insert(Reg).second)
+ continue;
+
+ // Check if any unit of this register is killed.
+ bool IsKilled = checkRegKilled(Reg, SlotIdx);
+
+ if (IsKilled) {
+ TempPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
+ *MRI);
+ }
+ }
}
- return TempPressure;
+ // Return sum of virtual and physical pressure
+ return TempVirtPressure + TempPhysPressure;
}
bool GCNUpwardRPTracker::isValid() const {
@@ -951,7 +1192,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
LiveIn = LiveOut = getLiveRegs(MBBStartSlot, LIS, MRI);
RPAtMBBEnd = getRegPressure(MRI, LiveIn);
} else {
- GCNDownwardRPTracker RPT(LIS);
+ GCNDownwardRPTracker RPT(LIS, MRI);
RPT.reset(MBB.front());
LiveIn = RPT.getLiveRegs();
@@ -966,7 +1207,7 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
RPAtMBBEnd = RPT.getPressure();
}
} else {
- GCNUpwardRPTracker RPT(LIS);
+ GCNUpwardRPTracker RPT(LIS, MRI);
RPT.reset(MRI, MBBLastSlot);
LiveOut = RPT.getLiveRegs();
@@ -1036,7 +1277,7 @@ LLVM_DUMP_METHOD void llvm::dumpMaxRegPressure(MachineFunction &MF,
unsigned MaxNumRegs = 0;
const MachineInstr *MaxVirtPressureMI = nullptr;
- GCNUpwardRPTracker RPT(LIS);
+ GCNUpwardRPTracker RPT(LIS, MRI);
for (const MachineBasicBlock &MBB : MF) {
RPT.reset(MRI, LIS.getSlotIndexes()->getMBBEndIdx(&MBB).getPrevSlot());
for (const MachineInstr &MI : reverse(MBB)) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 4dba6a4e2d71d..7ef704652b332 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -320,12 +320,47 @@ class GCNRPTracker {
protected:
const LiveIntervals &LIS;
+
+ // Virtual register tracking
LiveRegSet VirtLiveRegs;
GCNRegPressure CurVirtPressure, MaxVirtPressure;
+
+ // Physical register tracking: Maintain clean separation between virtual and
+ // physical registers. Tracking physical registers can be turned OFF with an
+ // option. Using llvm::LiveRegSet for consistency with the generic tracker.
+ llvm::LiveRegSet PhysLiveRegs;
+ GCNRegPressure CurPhysPressure, MaxPhysPressure;
+
+ // Flag to control whether physical register tracking is active.
+ // Set to true when GCNTrackers are enabled, false otherwise.
+ bool TrackPhysRegs = false;
+
const MachineInstr *LastTrackedMI = nullptr;
mutable const MachineRegisterInfo *MRI = nullptr;
- GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
+ GCNRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
+ : LIS(LIS), MRI(&MRI) {
+ setPhysRegTracking();
+ if (TrackPhysRegs)
+ PhysLiveRegs.init(MRI);
+ }
+
+ // Copy constructor - PhysLiveRegs must be initialized then copied.
+ GCNRPTracker(const GCNRPTracker &Other)
+ : LIS(Other.LIS), VirtLiveRegs(Other.VirtLiveRegs),
+ CurVirtPressure(Other.CurVirtPressure),
+ MaxVirtPressure(Other.MaxVirtPressure),
+ CurPhysPressure(Other.CurPhysPressure),
+ MaxPhysPressure(Other.MaxPhysPressure),
+ TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
+ MRI(Other.MRI) {
+ // Initialize PhysLiveRegs with proper universe, then copy contents.
+ if (MRI) {
+ PhysLiveRegs.init(*MRI);
+ PhysLiveRegs =
+ Other.PhysLiveRegs; // Use assignment operator to copy live regs.
+ }
+ }
void reset(const MachineInstr &MI, const LiveRegSet *VirtLiveRegsCopy,
bool After);
@@ -335,17 +370,60 @@ class GCNRPTracker {
LaneBitmask getLastUsedLanes(Register Reg, SlotIndex Pos) const;
+ // Helper to check if a register unit is live at a given slot index.
+ bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
+
+ // Check if all register units of Reg are currently live in PhysLiveRegs.
+ bool allRegUnitsLive(Register Reg) const;
+
+ // Check if Reg has any killed units at the given slot index.
+ bool checkRegKilled(Register Reg, SlotIndex SI) const;
+
+ // Check if Reg has any killed units and erase them from PhysLiveRegs.
+ bool eraseKilledUnits(Register Reg, SlotIndex SI);
+
+ // Erase all live units of Reg from PhysLiveRegs.
+ // Returns true if any unit was live (and thus erased).
+ bool eraseAllLiveUnits(Register Reg);
+
+ // Insert all not-live units of Reg into PhysLiveRegs.
+ // Returns true if any unit was not live (and thus inserted).
+ bool insertAllNotLiveUnits(Register Reg);
+
public:
+ // Enable physical register tracking only if both GCNTrackers and
+ // TrackPhysRegInTrackers are true.
+ void setPhysRegTracking();
+
// reset tracker and set live register set to the specified value.
void reset(const MachineRegisterInfo &MRInfo,
const LiveRegSet &VirtLiveRegsSet);
+
// live regs for the current state
const decltype(VirtLiveRegs) &getLiveRegs() const { return VirtLiveRegs; }
+ const decltype(VirtLiveRegs) &getVirtLiveRegs() const { return VirtLiveRegs; }
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
- void clearMaxPressure() { MaxVirtPressure.clear(); }
+ void clearMaxPressure() {
+ MaxVirtPressure.clear();
+ MaxPhysPressure.clear();
+ }
+
+ // Returns sum of virtual and physical register pressure
+ GCNRegPressure getPressure() const {
+ return CurVirtPressure + CurPhysPressure;
+ }
+
+ // Returns only virtual register pressure
+ GCNRegPressure getVirtPressure() const { return CurVirtPressure; }
+
+ // Returns only physical register pressure
+ GCNRegPressure getPhysPressure() const { return CurPhysPressure; }
- GCNRegPressure getPressure() const { return CurVirtPressure; }
+ // Returns sum of virtual and physical max pressure
+ GCNRegPressure getMaxPressure() const {
+ return MaxVirtPressure + MaxPhysPressure;
+ }
decltype(VirtLiveRegs) moveLiveRegs() { return std::move(VirtLiveRegs); }
};
@@ -360,7 +438,8 @@ getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
class GCNUpwardRPTracker : public GCNRPTracker {
public:
- GCNUpwardRPTracker(const LiveIntervals &LIS) : GCNRPTracker(LIS) {}
+ GCNUpwardRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
+ : GCNRPTracker(LIS, MRI) {}
using GCNRPTracker::reset;
@@ -389,12 +468,13 @@ class GCNUpwardRPTracker : public GCNRPTracker {
/// to reported by LIS.
bool isValid() const;
- const GCNRegPressure &getMaxPressure() const { return MaxVirtPressure; }
-
- void resetMaxPressure() { MaxVirtPressure = CurVirtPressure; }
+ void resetMaxPressure() {
+ MaxVirtPressure = CurVirtPressure;
+ MaxPhysPressure = CurPhysPressure;
+ }
GCNRegPressure getMaxPressureAndReset() {
- GCNRegPressure RP = MaxVirtPressure;
+ GCNRegPressure RP = getMaxPressure();
resetMaxPressure();
return RP;
}
@@ -410,7 +490,8 @@ class GCNDownwardRPTracker : public GCNRPTracker {
MachineBasicBlock::const_iterator MBBEnd;
public:
- GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {}
+ GCNDownwardRPTracker(const LiveIntervals &LIS, const MachineRegisterInfo &MRI)
+ : GCNRPTracker(LIS, MRI) {}
using GCNRPTracker::reset;
@@ -418,8 +499,9 @@ class GCNDownwardRPTracker : public GCNRPTracker {
/// \p return MaxPressure and clear it.
GCNRegPressure moveMaxPressure() {
- auto Res = MaxVirtPressure;
+ auto Res = getMaxPressure();
MaxVirtPressure.clear();
+ MaxPhysPressure.clear();
return Res;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 8b548c609e759..6685df3de7d22 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -78,6 +78,12 @@ static cl::opt<bool> GCNTrackers(
cl::desc("Use the AMDGPU specific RPTrackers during scheduling"),
cl::init(false));
+static cl::opt<bool> TrackPhysRegInTrackers(
+ "amdgpu-trackers-physical-register-tracking", cl::Hidden,
+ cl::desc("When using GCN trackers, count physical registers (e.g. from "
+ "inline asm) in pressure."),
+ cl::init(true));
+
static cl::opt<unsigned> PendingQueueLimit(
"amdgpu-scheduler-pending-queue-limit", cl::Hidden,
cl::desc(
@@ -107,14 +113,13 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
- DownwardTracker(*C->LIS), UpwardTracker(*C->LIS), HasHighPressure(false) {
-}
+ DownwardTracker(*C->LIS, C->MF->getRegInfo()),
+ UpwardTracker(*C->LIS, C->MF->getRegInfo()), HasHighPressure(false) {}
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
MF = &DAG->MF;
-
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
SGPRExcessLimit =
@@ -164,6 +169,14 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
}
+void GCNRPTracker::setPhysRegTracking() {
+ if (!GCNTrackers || !TrackPhysRegInTrackers) {
+ TrackPhysRegs = false;
+ return;
+ }
+ TrackPhysRegs = true;
+}
+
/// Checks whether \p SU can use the cached DAG pressure diffs to compute the
/// current register pressure.
///
@@ -988,7 +1001,7 @@ GCNRegPressure
GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
if (Regions[RegionIdx].first == Regions[RegionIdx].second)
return llvm::getRegPressure(MRI, LiveIns[RegionIdx]);
- GCNDownwardRPTracker RPTracker(*LIS);
+ GCNDownwardRPTracker RPTracker(*LIS, MF.getRegInfo());
RPTracker.advance(Regions[RegionIdx].first, Regions[RegionIdx].second,
&LiveIns[RegionIdx]);
return RPTracker.moveMaxPressure();
@@ -1002,7 +1015,7 @@ static MachineInstr *getLastMIForRegion(MachineBasicBlock::iterator RegionBegin,
void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
const MachineBasicBlock *MBB) {
- GCNDownwardRPTracker RPTracker(*LIS);
+ GCNDownwardRPTracker RPTracker(*LIS, MF.getRegInfo());
// If the block has the only successor then live-ins of that successor are
// live-outs of the current block. We can reuse calculated live set if the
@@ -1135,7 +1148,6 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
void GCNScheduleDAGMILive::runSchedStages() {
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
-
if (!Regions.empty()) {
BBLiveInMap = getRegionLiveInMap();
if (GCNTrackers)
@@ -3114,10 +3126,10 @@ void PreRARematStage::finalizeGCNSchedStage() {
}
// Revert re-scheduling in all affected regions.
- for (const auto &[RegionIdx, OrigMIOrder, MaxVirtPressure] : RegionReverts) {
+ for (const auto &[RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
REMAT_DEBUG(dbgs() << "Reverting re-scheduling in region " << RegionIdx
<< '\n');
- DAG.Pressure[RegionIdx] = MaxVirtPressure;
+ DAG.Pressure[RegionIdx] = MaxPressure;
modifyRegionSchedule(RegionIdx, RegionBB[RegionIdx], OrigMIOrder);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index b28cdcffa5d64..99fd55db33285 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -703,12 +703,12 @@ class PreRARematStage : public GCNSchedStage {
/// Original instruction order (both debug and non-debug MIs).
std::vector<MachineInstr *> OrigMIOrder;
/// Maximum pressure recorded in the region.
- GCNRegPressure MaxVirtPressure;
+ GCNRegPressure MaxPressure;
RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
- const GCNRegPressure &MaxVirtPressure)
+ const GCNRegPressure &MaxPressure)
: RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
- MaxVirtPressure(MaxVirtPressure) {}
+ MaxPressure(MaxPressure) {}
};
/// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
/// regions.
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index 982034189892c..98c6d59054331 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -275,7 +275,7 @@ bool SIFormMemoryClausesImpl::run(MachineFunction &MF) {
"amdgpu-max-memory-clause", MaxClause);
for (MachineBasicBlock &MBB : MF) {
- GCNDownwardRPTracker RPT(*LIS);
+ GCNDownwardRPTracker RPT(*LIS, *MRI);
MachineBasicBlock::instr_iterator Next;
for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
MachineInstr &MI = *I;
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 6dcd78ee52f5e..059d930dc8e45 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -11164,73 +11164,73 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: dead [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: dead [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_GT_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[COPY1]](s32), implicit $exec
; GFX908-GCNTRACKERS-NEXT: dead undef [[S_MOV_B32_:%[0-9]+]].sub0:sreg_64 = COPY [[S_LOAD_DWORDX2_IMM]].sub1
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF17]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_3:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF1]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_4:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF2]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_5:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF3]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_6:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF4]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_7:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF5]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_8:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF6]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_9:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF7]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_10:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF8]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_11:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF9]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_12:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF10]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_13:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF11]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_14:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF12]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_15:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF13]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_16:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF14]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_17:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF15]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_18:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF16]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_19:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF18]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_20:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF19]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF20]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF21]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_21:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF22]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_22:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF23]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_23:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF24]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_24:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF25]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_25:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF26]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_26:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF27]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr8, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 $vgpr9, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_31]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_5]], implicit [[V_CVT_I32_F32_e32_13]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_6]], implicit [[V_CVT_I32_F32_e32_14]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_7]], implicit [[V_CVT_I32_F32_e32_15]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_8]], implicit [[V_CVT_I32_F32_e32_16]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_17]], implicit [[V_CVT_I32_F32_e32_18]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_9]], implicit [[V_CVT_I32_F32_e32_17]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_10]], implicit [[V_CVT_I32_F32_e32_18]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_19]], implicit [[V_CVT_I32_F32_e32_20]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_21]], implicit [[V_CVT_I32_F32_e32_22]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_23]], implicit [[V_CVT_I32_F32_e32_24]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_25]], implicit [[V_CVT_I32_F32_e32_26]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_27]], implicit [[V_CVT_I32_F32_e32_28]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]]
; GFX908-GCNTRACKERS-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
@@ -11508,18 +11508,18 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-GCNTRACKERS-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF29]]
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_28]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 481eb1bc3d91a..810f478b3f12a 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -7,6 +7,14 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX7-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX8-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefixes=GFX900-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefixes=GFX942-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX10_1-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX10_3-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX11-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck -check-prefix=GFX12-GCNTRACKERS %s
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
<16 x i32>, <7 x i32>, ; vgprs
@@ -563,6 +571,540 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX7-GCNTRACKERS-NEXT: v_add_i32_e32 v0, vcc, 64, v0
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX7-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, 0x4040
+; GFX7-GCNTRACKERS-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX7-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v0, 6, v0
+; GFX7-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX8-GCNTRACKERS-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX8-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, 0x4040
+; GFX8-GCNTRACKERS-NEXT: v_mad_u32_u24 v0, v0, 64, s32
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v0, 6, v0
+; GFX8-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v0, off, s[0:3], s32
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 0x4040, v0
+; GFX900-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v0
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v0, off, s[0:3], s32
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x101100
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4044
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
+; GFX10_1-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v24
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v24, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24
+; GFX10_3-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v24
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80880
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 0x4040
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4044
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX12-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: s_add_co_ci_u32 s59, s32, 0x4000
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX12-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca i32, align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
@@ -1084,6 +1626,485 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX7-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v22, 6, v22
+; GFX7-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_mad_u32_u24 v22, 16, 64, s32
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e32 v22, 6, v22
+; GFX8-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 6, s32
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v22, 16, v22
+; GFX900-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x100400
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4010
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 16
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x4010
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v22, 16, v22
+; GFX10_1-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v22, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v22, 16, v22
+; GFX10_3-GCNTRACKERS-NEXT: v_readfirstlane_b32 s54, v22
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x80200
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4010
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_addc_u32 s59, s32, 16
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-GCNTRACKERS-NEXT: s_bitcmp1_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_bitset0_b32 s59, 0
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s59
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x4010
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s30, 0
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s59, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s32
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v21, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v21, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v21, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v21, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v21, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v21, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v21, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v21, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v21, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v21, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v21, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v21, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v21, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v21, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v21, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v21, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v21, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 16, addrspace(5)
; Force no SGPRs to be available for the carry-out of the vector add.
@@ -1660,6 +2681,543 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX7-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX7-GCNTRACKERS: ; %bb.0:
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX7-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s28, 17
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s29, 18
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX7-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX7-GCNTRACKERS-NEXT: v_lshr_b32_e64 v0, s32, 6
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX7-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX7-GCNTRACKERS-NEXT: v_add_i32_e32 v0, vcc, 64, v0
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX7-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX7-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX7-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX7-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s28, v23, 17
+; GFX7-GCNTRACKERS-NEXT: v_readlane_b32 s29, v23, 18
+; GFX7-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX7-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX7-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX7-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX7-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX8-GCNTRACKERS: ; %bb.0:
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX8-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX8-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX8-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX8-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX8-GCNTRACKERS-NEXT: v_add_u32_e32 v0, vcc, 64, v0
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX8-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX8-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX8-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX8-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX8-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX8-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX8-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX8-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX8-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX8-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX900-GCNTRACKERS: ; %bb.0:
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX900-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s30, 0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s31, 1
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s33, 2
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s34, 3
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s35, 4
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s36, 5
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s37, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s38, 7
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s39, 8
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s48, 9
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s49, 10
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s50, 11
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s51, 12
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s52, 13
+; GFX900-GCNTRACKERS-NEXT: s_lshr_b32 s5, s32, 6
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s53, 14
+; GFX900-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 6, s32
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s4, s5, 0x4240
+; GFX900-GCNTRACKERS-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
+; GFX900-GCNTRACKERS-NEXT: v_add_u32_e32 v0, 64, v0
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v22, s4, 0
+; GFX900-GCNTRACKERS-NEXT: s_and_b64 s[4:5], 0, exec
+; GFX900-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 0
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX900-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX900-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s55, v23, 16
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s54, v23, 15
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s53, v23, 14
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s52, v23, 13
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s51, v23, 12
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s50, v23, 11
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s49, v23, 10
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s48, v23, 9
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s39, v23, 8
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s38, v23, 7
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s37, v23, 6
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s36, v23, 5
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s35, v23, 4
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s34, v23, 3
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s33, v23, 2
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s31, v23, 1
+; GFX900-GCNTRACKERS-NEXT: v_readlane_b32 s30, v23, 0
+; GFX900-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_add_i32 s6, s32, 0x201100
+; GFX900-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
+; GFX900-GCNTRACKERS-NEXT: s_mov_b64 exec, s[4:5]
+; GFX900-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX900-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX942-GCNTRACKERS: ; %bb.0:
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-GCNTRACKERS-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX942-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX942-GCNTRACKERS-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s2, s32, 0x8040
+; GFX942-GCNTRACKERS-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload
+; GFX942-GCNTRACKERS-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX942-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_1-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX10_1-GCNTRACKERS: ; %bb.0:
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX10_1-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_1-GCNTRACKERS-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s58, s4, 0x4240
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_1-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_1-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX10_1-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX10_1-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX10_1-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_1-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_1-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_1-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_1-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10_3-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX10_3-GCNTRACKERS: ; %bb.0:
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-GCNTRACKERS-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX10_3-GCNTRACKERS-NEXT: v_lshrrev_b32_e64 v0, 5, s32
+; GFX10_3-GCNTRACKERS-NEXT: s_lshr_b32 s4, s32, 5
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s58, s4, 0x4240
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_add_nc_u32_e32 v0, 64, v0
+; GFX10_3-GCNTRACKERS-NEXT: s_and_b32 s4, 0, exec_lo
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX10_3-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX10_3-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX10_3-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX10_3-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s4, -1
+; GFX10_3-GCNTRACKERS-NEXT: s_add_i32 s5, s32, 0x100800
+; GFX10_3-GCNTRACKERS-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload
+; GFX10_3-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s4
+; GFX10_3-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX10_3-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX11-GCNTRACKERS: ; %bb.0:
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-GCNTRACKERS-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX11-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX11-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX11-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX11-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX11-GCNTRACKERS-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX11-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX11-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX11-GCNTRACKERS-NEXT: s_add_i32 s1, s32, 0x8040
+; GFX11-GCNTRACKERS-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload
+; GFX11-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX11-GCNTRACKERS-NEXT: s_waitcnt vmcnt(0)
+; GFX11-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-GCNTRACKERS-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset:
+; GFX12-GCNTRACKERS: ; %bb.0:
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_expcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_samplecnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_bvhcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_wait_kmcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s30, 0
+; GFX12-GCNTRACKERS-NEXT: s_add_co_i32 s58, s32, 0x4200
+; GFX12-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s32
+; GFX12-GCNTRACKERS-NEXT: s_and_b32 s0, 0, exec_lo
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use alloca0 v0
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s31, 1
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s33, 2
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s34, 3
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s35, 4
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s36, 5
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s37, 6
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s38, 7
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s39, 8
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s48, 9
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s49, 10
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
+; GFX12-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMSTART
+; GFX12-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
+; GFX12-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s55, v22, 16
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s54, v22, 15
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s53, v22, 14
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s52, v22, 13
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s51, v22, 12
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s50, v22, 11
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s49, v22, 10
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s48, v22, 9
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s39, v22, 8
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s38, v22, 7
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s37, v22, 6
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s36, v22, 5
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s35, v22, 4
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s34, v22, 3
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s33, v22, 2
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s31, v22, 1
+; GFX12-GCNTRACKERS-NEXT: v_readlane_b32 s30, v22, 0
+; GFX12-GCNTRACKERS-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX12-GCNTRACKERS-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload
+; GFX12-GCNTRACKERS-NEXT: s_wait_alu depctr_sa_sdst(0)
+; GFX12-GCNTRACKERS-NEXT: s_mov_b32 exec_lo, s0
+; GFX12-GCNTRACKERS-NEXT: s_wait_loadcnt 0x0
+; GFX12-GCNTRACKERS-NEXT: s_setpc_b64 s[30:31]
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
%alloca1 = alloca [4096 x i32], align 4, addrspace(5)
call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
index f70cd6816a966..b2b73e9a96fcb 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll
@@ -1,5 +1,6 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 2>&1 < %s | FileCheck -check-prefixes=ERR-GCNTRACKERS %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack 2>&1 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 2>&1 < %s | FileCheck -check-prefixes=GCN-TRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack 2>&1 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 2>&1 < %s | FileCheck --check-prefix=GCN-NOPHYS-FAIL %s
%asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs
<16 x i32>, <7 x i32>, ; vgprs
@@ -16,10 +17,13 @@
i64 ; vcc
}
-; ERR-GCNTRACKERS: ran out of registers during register allocation
+; GCN-TRACKERS-NOT: ran out of registers during register allocation
; GCN-NOT: ran out of registers during register allocation
+; GCN-NOPHYS-FAIL: ran out of registers during register allocation
-; FIXME: GCN Trackers do not track pressure from PhysRegs, so scheduling is actually worse
+; GCN Trackers now track physical register pressure correctly, so this test
+; verifies that both trackers can successfully handle code with heavy physical
+; register usage from inline assembly.
define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 {
%alloca0 = alloca [4096 x i32], align 64, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 0d25bc97ff775..0d81a11243ccf 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -1,17 +1,31 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 < %s | FileCheck --check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN-GCNTRACKERS %s
-
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED-GCNTRACKERS %s
+; REQUIRES: asserts
; CHECK-LABEL: {{^}}spill:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
; GCN: NumVgprs: 1
-; GCN-GCNTRACKERS: NumVgprs: 2
+; GCN-GCNTRACKERS: NumVgprs: 1
; GCN: ScratchSize: 0
; GCN-GCNTRACKERS: ScratchSize: 0
; GCN: Occupancy: 5
; GCN-GCNTRACKERS: Occupancy: 5
-
-; FIXME: GCN Trackers do not track pressure from PhysRegs, so scheduling is actually worse
+;
+; Check scheduling pressure values:
+; SCHED-LABEL: spill:%bb.0 entry
+; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
+; SCHED: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 97
+;
+; SCHED-GCNTRACKERS-LABEL: spill:%bb.0 entry
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 99
+; SCHED-GCNTRACKERS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 98
+;
+; NOTE: GCN Trackers now track pressure from both virtual and physical registers.
+; The GCN tracker now matches the generic tracker's VGPR count (1 VGPR).
+; The SGPR pressure is still slightly higher (98 vs 97) due to summing physical
+; register pressure from inline asm constraints with virtual register pressure.
define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
@@ -247,9 +261,15 @@ bb3:
; GCN: NumSgprs: 104
; GCN-GCNTRACKERS: NumSgprs: 104
; GCN: NumVgprs: 2
-; GCN-GCNTRACKERS: NumVgprs: 3
+; GCN-GCNTRACKERS: NumVgprs: 2
; GCN: ScratchSize: 8
-; GCN-GCNTRACKERS: ScratchSize: 12
+; GCN-GCNTRACKERS: ScratchSize: 8
+;
+; SCHED-LABEL: spill_func:%bb.0 entry
+; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 97
+;
+; SCHED-GCNTRACKERS-LABEL: spill_func:%bb.0 entry
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
define void @spill_func(ptr addrspace(1) %arg) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
new file mode 100644
index 0000000000000..084acee121f78
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -0,0 +1,513 @@
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
+; REQUIRES: asserts
+
+; Test that GCN trackers correctly track physical register pressure from inline asm
+
+; GCN-DEBUG-LABEL: test_single_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_single_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test multiple physical registers
+
+; GCN-DEBUG-LABEL: test_multiple_physregs
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_multiple_physregs
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register with virtual registers
+
+; GCN-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+entry:
+ %asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ %val = load i32, ptr addrspace(1) %in
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test that we don't inflate pressure when not using GCN trackers
+
+; GCN-DEBUG-LABEL: test_no_inflation
+
+; GENERIC-DEBUG-LABEL: test_no_inflation
+
+; GCN-NOPHYS-DEBUG-LABEL: test_no_inflation
+
+define amdgpu_kernel void @test_no_inflation() {
+entry:
+ ret void
+}
+
+; Test early-clobber constraint
+
+; GCN-DEBUG-LABEL: test_early_clobber
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_early_clobber
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register input
+
+; GCN-DEBUG-LABEL: test_physreg_input
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_physreg_input
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_input
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
+ store i32 0, ptr addrspace(1) %out
+ ret void
+}
+
+; Test virtual and physical register overlap
+
+; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 18
+
+; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+
+; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_overlap
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+
+define amdgpu_kernel void @test_vreg_and_physreg_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ %val1 = load i32, ptr addrspace(1) %in1
+ %val2 = load i32, ptr addrspace(1) %in2
+ %sum = add i32 %val1, %val2
+ store i32 %sum, ptr addrspace(1) %out
+ ret void
+}
+
+; Verify assembly output for GCN trackers
+; GCN-LABEL: test_single_physreg:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_single_physreg.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_multiple_physregs:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_multiple_physregs.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_physreg_with_vreg:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_mov_b32 s4, s2
+; GCN-NEXT: s_mov_b32 s5, s3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_no_inflation:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_endpgm
+; GCN: .set test_no_inflation.numbered_sgpr, 0
+; GCN: TotalNumSgprs: 0
+; GCN: NumVgprs: 0
+
+; GCN-LABEL: test_early_clobber:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_early_clobber.numbered_sgpr, 11
+; GCN: TotalNumSgprs: 11
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_physreg_input:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_physreg_input.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NEXT: ; %bb.0:
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: s_mov_b32 s0, s2
+; GCN-NEXT: s_mov_b32 s1, s3
+; GCN-NEXT: s_mov_b32 s2, s6
+; GCN-NEXT: s_mov_b32 s3, s7
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NEXT: s_mov_b32 s10, s6
+; GCN-NEXT: s_mov_b32 s11, s7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+; GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 14
+; GCN: NumVgprs: 2
+
+; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
+; GCN-NOPHYS-LABEL: test_single_physreg:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_single_physreg.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_multiple_physregs:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_multiple_physregs.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_no_inflation:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_no_inflation.numbered_sgpr, 0
+; GCN-NOPHYS: TotalNumSgprs: 0
+; GCN-NOPHYS: NumVgprs: 0
+
+; GCN-NOPHYS-LABEL: test_early_clobber:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_early_clobber.numbered_sgpr, 11
+; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_physreg_input:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_physreg_input.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NOPHYS-NEXT: ; %bb.0:
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s11, s7
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+; GCN-NOPHYS: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 14
+; GCN-NOPHYS: NumVgprs: 2
+
+; Verify assembly output without GCN trackers (should be identical)
+; NO-GCN-LABEL: test_single_physreg:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_single_physreg.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_multiple_physregs:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_multiple_physregs.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_physreg_with_vreg:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_mov_b32 s4, s2
+; NO-GCN-NEXT: s_mov_b32 s5, s3
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_no_inflation:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_no_inflation.numbered_sgpr, 0
+; NO-GCN: TotalNumSgprs: 0
+; NO-GCN: NumVgprs: 0
+
+; NO-GCN-LABEL: test_early_clobber:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_early_clobber.numbered_sgpr, 11
+; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_physreg_input:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_physreg_input.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_vreg_and_physreg_overlap:
+; NO-GCN-NEXT: ; %bb.0:
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: s_mov_b32 s0, s2
+; NO-GCN-NEXT: s_mov_b32 s1, s3
+; NO-GCN-NEXT: s_mov_b32 s2, s6
+; NO-GCN-NEXT: s_mov_b32 s3, s7
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; NO-GCN-NEXT: s_mov_b32 s10, s6
+; NO-GCN-NEXT: s_mov_b32 s11, s7
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; NO-GCN-NEXT: s_endpgm
+; NO-GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 14
+; NO-GCN: NumVgprs: 2
>From 2894bb8df5498f7cf457df50874cfe13e76f3e21 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 3 Mar 2026 21:53:28 -0600
Subject: [PATCH 2/7] [AMDGPU] Used LiveRegUnits for physical register pressure
tracking.
Addressed review comments as well.
---
llvm/include/llvm/CodeGen/RegisterPressure.h | 14 -
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 215 ++++++------
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 30 +-
.../machine-scheduler-sink-trivial-remats.mir | 10 +-
.../AMDGPU/materialize-frame-index-sgpr.ll | 10 +-
.../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 9 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 310 +++++++++++-------
7 files changed, 333 insertions(+), 265 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 01a944f386014..7485be6dcb351 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -293,20 +293,6 @@ class LiveRegSet {
}
public:
- LiveRegSet() = default;
-
- // Copy assignment operator - copies live register contents.
- // Note: Both LiveRegSets must have been initialized with init() first.
- LiveRegSet &operator=(const LiveRegSet &Other) {
- if (this != &Other) {
- NumRegUnits = Other.NumRegUnits;
- Regs.clear();
- for (const IndexMaskPair &Pair : Other.Regs)
- Regs.insert(Pair);
- }
- return *this;
- }
-
LLVM_ABI void clear();
LLVM_ABI void init(const MachineRegisterInfo &MRI);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5a980f3014654..efd4599c89509 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -14,7 +14,9 @@
#include "GCNRegPressure.h"
#include "AMDGPU.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/Support/MathExtras.h"
@@ -48,36 +50,10 @@ unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
: (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
}
-void GCNRegPressure::inc(unsigned Reg,
- LaneBitmask PrevMask,
- LaneBitmask NewMask,
- const MachineRegisterInfo &MRI) {
- const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
- const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
- const TargetRegisterClass *RC;
- if (Register(Reg).isVirtual()) {
- RC = MRI.getRegClass(Reg);
- } else {
- if (!MRI.isAllocatable(Reg))
- return;
- RC = TRI->getMinimalPhysRegClass(Reg);
- if (!RC)
- return;
- }
-
- unsigned RegKind = getRegKind(RC, STI);
+void GCNRegPressure::inc(unsigned Reg, LaneBitmask PrevMask,
+ LaneBitmask NewMask, const MachineRegisterInfo &MRI) {
unsigned NewNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(NewMask);
unsigned PrevNumCoveredRegs = SIRegisterInfo::getNumCoveredRegs(PrevMask);
- // If multiple bits are set in the input masks for physical SGPRs, the
- // expected result does not match what getNumCoveredRegs returns. This is
- // because it returns the number of vector lanes, not the number of 32-bit
- // regs. Hence, cap to the register's actual size so e.g. a 32-bit SGPR counts
- // as 1 and VCC (64-bit) counts as 2, not 32.
- if (Register(Reg).isPhysical() && RegKind == SGPR) {
- unsigned MaxCovered = divideCeil(TRI->getRegSizeInBits(*RC), 32);
- NewNumCoveredRegs = std::min(NewNumCoveredRegs, MaxCovered);
- PrevNumCoveredRegs = std::min(PrevNumCoveredRegs, MaxCovered);
- }
if (NewNumCoveredRegs == PrevNumCoveredRegs)
return;
@@ -90,6 +66,10 @@ void GCNRegPressure::inc(unsigned Reg,
assert(PrevMask < NewMask && PrevNumCoveredRegs < NewNumCoveredRegs &&
"prev mask should always be lesser than new");
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ unsigned RegKind = getRegKind(RC, STI);
if (TRI->getRegSizeInBits(*RC) != 32) {
// Reg is from a tuple register class.
if (PrevMask.none()) {
@@ -119,6 +99,28 @@ void GCNRegPressure::inc(unsigned Reg,
Value[RegKind] += Sign;
}
+void GCNRegPressure::inc(MCRegister Reg, bool IsAdd,
+ const MachineRegisterInfo &MRI) {
+ if (!MRI.isAllocatable(Reg))
+ return;
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ if (!RC)
+ return;
+ const SIRegisterInfo *STI = static_cast<const SIRegisterInfo *>(TRI);
+ unsigned RegKind = getRegKind(RC, STI);
+ unsigned NumRegs = divideCeil(TRI->getRegSizeInBits(*RC), 32);
+ int Sign = IsAdd ? 1 : -1;
+
+ if (TRI->getRegSizeInBits(*RC) != 32) {
+ unsigned TupleIdx = TOTAL_KINDS + RegKind;
+ Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
+ Value[RegKind] += Sign * static_cast<int>(NumRegs);
+ } else {
+ Value[RegKind] += Sign;
+ }
+}
+
namespace {
struct RegExcess {
unsigned SGPR = 0;
@@ -495,72 +497,64 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
bool GCNRPTracker::isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const {
const LiveRange *LR = LIS.getCachedRegUnit(Unit);
- if (!LR)
- return false;
- return LR->liveAt(SI);
+ // If LIS has no reg-unit live range, be conservative and assume it is live.
+ return !LR || LR->liveAt(SI);
}
-bool GCNRPTracker::allRegUnitsLive(Register Reg) const {
+bool GCNRPTracker::allRegUnitsLive(MCRegister Reg) const {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const BitVector &Units = PhysLiveRegs.getBitVector();
return llvm::all_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
- return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any();
+ return Units.test(static_cast<unsigned>(Unit));
});
}
-bool GCNRPTracker::checkRegKilled(Register Reg, SlotIndex SI) const {
+bool GCNRPTracker::checkRegKilled(MCRegister Reg, SlotIndex SI) const {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ const BitVector &Units = PhysLiveRegs.getBitVector();
return llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
- return PhysLiveRegs.contains(VirtRegOrUnit(Unit)).any() &&
- !isUnitLiveAt(Unit, SI);
+ return Units.test(static_cast<unsigned>(Unit)) && !isUnitLiveAt(Unit, SI);
});
}
-bool GCNRPTracker::eraseKilledUnits(Register Reg, SlotIndex SI) {
+bool GCNRPTracker::eraseKilledUnits(MCRegister Reg, SlotIndex SI) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- bool IsKilled = false;
+ BitVector KilledUnits(PhysLiveRegs.getBitVector().size(), false);
for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.any()) {
- if (!isUnitLiveAt(Unit, SI)) {
- IsKilled = true;
- PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ unsigned U = static_cast<unsigned>(Unit);
+ if (PhysLiveRegs.getBitVector().test(U) && !isUnitLiveAt(Unit, SI))
+ KilledUnits.set(U);
}
- return IsKilled;
+ if (KilledUnits.none())
+ return false;
+ PhysLiveRegs.removeUnits(KilledUnits);
+ return true;
}
-bool GCNRPTracker::eraseAllLiveUnits(Register Reg) {
+bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- bool WasLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.any()) {
- WasLive = true;
- PhysLiveRegs.erase(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ const BitVector &Units = PhysLiveRegs.getBitVector();
+ bool WasLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return Units.test(static_cast<unsigned>(Unit));
+ });
+ if (WasLive)
+ PhysLiveRegs.removeReg(Reg);
return WasLive;
}
-bool GCNRPTracker::insertAllNotLiveUnits(Register Reg) {
+bool GCNRPTracker::insertAllNotLiveUnits(MCRegister Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
- bool WasNotLive = false;
- for (MCRegUnit Unit : TRI->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.none()) {
- WasNotLive = true;
- PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
- }
+ const BitVector &Units = PhysLiveRegs.getBitVector();
+ bool WasNotLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ return !Units.test(static_cast<unsigned>(Unit));
+ });
+ if (WasNotLive)
+ PhysLiveRegs.addReg(Reg);
return WasNotLive;
}
@@ -618,7 +612,7 @@ void GCNRPTracker::reset(const MachineInstr &MI,
// Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
- PhysLiveRegs.init(*MRI);
+ PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
MaxPhysPressure.clear();
CurPhysPressure.clear();
}
@@ -635,7 +629,7 @@ void GCNRPTracker::reset(const MachineRegisterInfo &MRInfo,
// Clear physical register tracking (only if enabled)
if (TrackPhysRegs) {
PhysLiveRegs.clear();
- PhysLiveRegs.init(*MRI);
+ PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
MaxPhysPressure.clear();
CurPhysPressure.clear();
}
@@ -722,30 +716,31 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (!MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was live before and erase them.
- bool WasLive = eraseAllLiveUnits(Reg);
+ // Check if any unit of this register was live before and if so,
+ // erase all of the regunits from PhysLiveRegs.
+ bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
- // Update pressure once per register if it was live.
+ // Update pressure once per register if any unit of this register was live
+ // before.
if (WasLive)
- CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
- *MRI);
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
// Make physical register uses alive (moving backward in upward tracking).
- for (const MachineOperand &MO : MI.uses()) {
+ for (const MachineOperand &MO : MI.all_uses()) {
if (!MO.isReg() || !MO.getReg().isPhysical() || !MO.readsReg())
continue;
Register Reg = MO.getReg();
if (!MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was not live before and insert them.
- bool WasNotLive = insertAllNotLiveUnits(Reg);
+ // Check if any unit of this register was not live before and if so,
+ // insert all of the regunits into PhysLiveRegs.
+ bool WasNotLive = insertAllNotLiveUnits(Reg.asMCReg());
- // Update pressure once per register if it wasn't live before.
- if (WasNotLive) {
- CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
- *MRI);
- }
+ // Update pressure once per register if any unit of this register was not
+ // live before.
+ if (WasNotLive)
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
@@ -832,7 +827,8 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
// Track physical register kills (only if enabled).
if (TrackPhysRegs) {
- // Iterate over actual instruction operands to track which registers die.
+ // Iterate over actual instruction operands to track which regunits are
+ // killed.
SmallSet<Register, 8> SeenRegs;
for (const auto &MO : CurrMI->operands()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
@@ -842,12 +838,12 @@ bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
continue;
// Check if any unit of this register is killed and erase killed units.
- bool IsKilled = eraseKilledUnits(Reg, SI);
+ bool IsKilled = eraseKilledUnits(Reg.asMCReg(), SI);
- // Update pressure once per register if it was live and is now killed.
+ // Update pressure once per register if any unit of this register is
+ // killed.
if (IsKilled)
- CurPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
- *MRI);
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
}
@@ -888,21 +884,16 @@ void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
if (!Reg.isPhysical() || !MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was not live before.
- bool WasNotLive = false;
- for (MCRegUnit Unit : MRI->getTargetRegisterInfo()->regunits(Reg)) {
- VirtRegOrUnit VRU(Unit);
- LaneBitmask PrevMask = PhysLiveRegs.contains(VRU);
- if (PrevMask.none())
- WasNotLive = true;
- // Mark unit as live
- PhysLiveRegs.insert(VRegMaskOrUnit(VRU, LaneBitmask::getAll()));
- }
+ // Check if any unit of this register is not live before and if so,
+ // insert all of the regunits into PhysLiveRegs.
+ bool WasNotLive = !allRegUnitsLive(Reg.asMCReg());
+ if (WasNotLive && !MO.isDead())
+ PhysLiveRegs.addReg(Reg);
- // Update pressure once per register if it wasn't live before.
- if (WasNotLive)
- CurPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
- *MRI);
+ // Update pressure once per register if any unit of this register is not
+ // live before.
+ if (WasNotLive && !MO.isDead())
+ CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
}
@@ -1035,17 +1026,17 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
continue;
// Check if any unit of this register is not currently live.
- bool WasNotLive = !allRegUnitsLive(Reg);
+ bool WasNotLive = !allRegUnitsLive(Reg.asMCReg());
- if (WasNotLive && !MO.isDead()) {
- TempPhysPressure.inc(Reg, LaneBitmask::getNone(), LaneBitmask::getAll(),
- *MRI);
- }
+ // Update pressure once per register if any unit of this register is not
+ // live before.
+ if (WasNotLive && !MO.isDead())
+ TempPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
// Process physical register uses to find kills.
SeenRegs.clear();
- for (const auto &MO : MI->uses()) {
+ for (const auto &MO : MI->all_uses()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
Register Reg = MO.getReg();
@@ -1054,12 +1045,12 @@ GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
continue;
// Check if any unit of this register is killed.
- bool IsKilled = checkRegKilled(Reg, SlotIdx);
+ bool IsKilled = checkRegKilled(Reg.asMCReg(), SlotIdx);
- if (IsKilled) {
- TempPhysPressure.inc(Reg, LaneBitmask::getAll(), LaneBitmask::getNone(),
- *MRI);
- }
+ // Update pressure once per register if any unit of this register is
+ // killed.
+ if (IsKilled)
+ TempPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/false, *MRI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 7ef704652b332..04000e33aae28 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -19,6 +19,7 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include <algorithm>
#include <array>
@@ -130,6 +131,10 @@ struct GCNRegPressure {
LaneBitmask NewMask,
const MachineRegisterInfo &MRI);
+ /// Update pressure for a physical register (add or remove). Used when
+ /// tracking physical registers.
+ void inc(MCRegister Reg, bool IsAdd, const MachineRegisterInfo &MRI);
+
bool higherOccupancy(const GCNSubtarget &ST, const GCNRegPressure &O,
unsigned DynamicVGPRBlockSize) const {
return getOccupancy(ST, DynamicVGPRBlockSize) >
@@ -327,8 +332,8 @@ class GCNRPTracker {
// Physical register tracking: Maintain clean separation between virtual and
// physical registers. Tracking physical registers can be turned OFF with an
- // option. Using llvm::LiveRegSet for consistency with the generic tracker.
- llvm::LiveRegSet PhysLiveRegs;
+ // option. Uses LiveRegUnits (bit vector of live register units).
+ LiveRegUnits PhysLiveRegs;
GCNRegPressure CurPhysPressure, MaxPhysPressure;
// Flag to control whether physical register tracking is active.
@@ -342,7 +347,7 @@ class GCNRPTracker {
: LIS(LIS), MRI(&MRI) {
setPhysRegTracking();
if (TrackPhysRegs)
- PhysLiveRegs.init(MRI);
+ PhysLiveRegs.init(*MRI.getTargetRegisterInfo());
}
// Copy constructor - PhysLiveRegs must be initialized then copied.
@@ -354,11 +359,10 @@ class GCNRPTracker {
MaxPhysPressure(Other.MaxPhysPressure),
TrackPhysRegs(Other.TrackPhysRegs), LastTrackedMI(Other.LastTrackedMI),
MRI(Other.MRI) {
- // Initialize PhysLiveRegs with proper universe, then copy contents.
- if (MRI) {
- PhysLiveRegs.init(*MRI);
- PhysLiveRegs =
- Other.PhysLiveRegs; // Use assignment operator to copy live regs.
+ if (TrackPhysRegs) {
+ assert(MRI && "MRI not initialized");
+ PhysLiveRegs.init(*MRI->getTargetRegisterInfo());
+ PhysLiveRegs.addUnits(Other.PhysLiveRegs.getBitVector());
}
}
@@ -374,21 +378,21 @@ class GCNRPTracker {
bool isUnitLiveAt(MCRegUnit Unit, SlotIndex SI) const;
// Check if all register units of Reg are currently live in PhysLiveRegs.
- bool allRegUnitsLive(Register Reg) const;
+ bool allRegUnitsLive(MCRegister Reg) const;
// Check if Reg has any killed units at the given slot index.
- bool checkRegKilled(Register Reg, SlotIndex SI) const;
+ bool checkRegKilled(MCRegister Reg, SlotIndex SI) const;
// Check if Reg has any killed units and erase them from PhysLiveRegs.
- bool eraseKilledUnits(Register Reg, SlotIndex SI);
+ bool eraseKilledUnits(MCRegister Reg, SlotIndex SI);
// Erase all live units of Reg from PhysLiveRegs.
// Returns true if any unit was live (and thus erased).
- bool eraseAllLiveUnits(Register Reg);
+ bool eraseAllLiveUnits(MCRegister Reg);
// Insert all not-live units of Reg into PhysLiveRegs.
// Returns true if any unit was not live (and thus inserted).
- bool insertAllNotLiveUnits(Register Reg);
+ bool insertAllNotLiveUnits(MCRegister Reg);
public:
// Enable physical register tracking only if both GCNTrackers and
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
index 059d930dc8e45..2d2ff828d3ef5 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir
@@ -11507,19 +11507,19 @@ body: |
; GFX908-GCNTRACKERS-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_27:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF28]], implicit $exec, implicit $mode
; GFX908-GCNTRACKERS-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_28:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF29]], implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX908-GCNTRACKERS-NEXT: S_BRANCH %bb.1
; GFX908-GCNTRACKERS-NEXT: {{ $}}
; GFX908-GCNTRACKERS-NEXT: bb.1:
; GFX908-GCNTRACKERS-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 255
; GFX908-GCNTRACKERS-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 [[S_MOV_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]], implicit [[V_CVT_I32_F32_e32_9]]
- ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_30]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_29:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_30:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[DEF30]], implicit $exec, implicit $mode
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_29]], implicit [[V_CVT_I32_F32_e32_30]], implicit [[DEF30]]
; GFX908-GCNTRACKERS-NEXT: [[V_CVT_I32_F32_e32_31:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_29]], implicit [[DEF30]]
+ ; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_31]], implicit [[V_CVT_I32_F32_e32_28]], implicit [[DEF29]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]], implicit [[V_CVT_I32_F32_e32_10]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_3]], implicit [[V_CVT_I32_F32_e32_11]]
; GFX908-GCNTRACKERS-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_4]], implicit [[V_CVT_I32_F32_e32_12]]
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 810f478b3f12a..14bd5a9477acc 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -799,7 +799,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0
; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s54, 15
; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
-; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v23, s55, 16
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
@@ -1826,7 +1826,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s52, 13
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s53, 14
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s54, 15
-; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v21, s55, 16
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc
@@ -2916,12 +2916,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s50, 11
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s51, 12
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s52, 13
-; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s53, 14
-; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s0, s32, 64
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s54, 15
; GFX942-GCNTRACKERS-NEXT: v_mov_b32_e32 v0, s0
-; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[0:1], 0, exec
; GFX942-GCNTRACKERS-NEXT: v_writelane_b32 v22, s55, 16
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; use alloca0 v0
@@ -2929,7 +2927,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX942-GCNTRACKERS-NEXT: ;;#ASMEND
+; GFX942-GCNTRACKERS-NEXT: s_add_i32 s58, s32, 0x4240
; GFX942-GCNTRACKERS-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54
+; GFX942-GCNTRACKERS-NEXT: s_and_b64 s[60:61], 0, exec
; GFX942-GCNTRACKERS-NEXT: s_mov_b32 s54, s58
; GFX942-GCNTRACKERS-NEXT: ;;#ASMSTART
; GFX942-GCNTRACKERS-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 0d81a11243ccf..252875377d1ea 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -19,13 +19,14 @@
; SCHED: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 97
;
; SCHED-GCNTRACKERS-LABEL: spill:%bb.0 entry
-; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 99
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 193
; SCHED-GCNTRACKERS: Pressure after scheduling: VGPRs: 0 AGPRs: 0, SGPRs: 98
;
; NOTE: GCN Trackers now track pressure from both virtual and physical registers.
; The GCN tracker now matches the generic tracker's VGPR count (1 VGPR).
-; The SGPR pressure is still slightly higher (98 vs 97) due to summing physical
-; register pressure from inline asm constraints with virtual register pressure.
+; When a live range is not found for a physical regunit, we conservatively
+; assume the unit is live, so Region SGPR pressure can be higher (193 vs 98).
+; Pressure after scheduling remains 98 vs 97 due to physical register tracking.
define amdgpu_kernel void @spill(ptr addrspace(1) %arg, i32 %cnd) #0 {
entry:
@@ -269,7 +270,7 @@ bb3:
; SCHED: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 97
;
; SCHED-GCNTRACKERS-LABEL: spill_func:%bb.0 entry
-; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 98
+; SCHED-GCNTRACKERS: Region register pressure: VGPRs: 0 AGPRs: 0, SGPRs: 192
define void @spill_func(ptr addrspace(1) %arg) #0 {
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 084acee121f78..fdb10483d9a42 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -9,77 +9,68 @@
; Test that GCN trackers correctly track physical register pressure from inline asm
; GCN-DEBUG-LABEL: test_single_physreg
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_single_physreg
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
- store i32 0, ptr addrspace(1) %out
+ store i32 %val, ptr addrspace(1) %out
ret void
}
; Test multiple physical registers
; GCN-DEBUG-LABEL: test_multiple_physregs
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_multiple_physregs
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
- store i32 0, ptr addrspace(1) %out
+ %r0 = extractvalue { i32, i32 } %result, 0
+ %r1 = extractvalue { i32, i32 } %result, 1
+ %sum = add i32 %r0, %r1
+ store i32 %sum, ptr addrspace(1) %out
ret void
}
; Test physical register with virtual registers
; GCN-DEBUG-LABEL: test_physreg_with_vreg
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
entry:
%asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
%val = load i32, ptr addrspace(1) %in
- store i32 %val, ptr addrspace(1) %out
- ret void
-}
-
-; Test that we don't inflate pressure when not using GCN trackers
-
-; GCN-DEBUG-LABEL: test_no_inflation
-
-; GENERIC-DEBUG-LABEL: test_no_inflation
-
-; GCN-NOPHYS-DEBUG-LABEL: test_no_inflation
-
-define amdgpu_kernel void @test_no_inflation() {
-entry:
+ %sum = add i32 %asm_val, %val
+ store i32 %sum, ptr addrspace(1) %out
ret void
}
@@ -107,45 +98,95 @@ entry:
; Test physical register input
; GCN-DEBUG-LABEL: test_physreg_input
-; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
; GENERIC-DEBUG-LABEL: test_physreg_input
-; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
; GCN-NOPHYS-DEBUG-LABEL: test_physreg_input
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 4, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
entry:
%val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
- store i32 0, ptr addrspace(1) %out
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register pressure for tuple (64-bit) registers.
+; GCN tracker counts the 2 SGPRs.
+
+; GCN-DEBUG-LABEL: test_tuple_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+; GENERIC-DEBUG-LABEL: test_tuple_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_tuple_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_tuple_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i64 asm sideeffect "s_mov_b64 $0, 0", "={s[10:11]}"()
+ %lo = trunc i64 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register pressure for 128-bit tuple.
+; GCN tracker counts the 4 SGPRs.
+
+; GCN-DEBUG-LABEL: test_tuple128_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_tuple128_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+; GCN-NOPHYS-DEBUG-LABEL: test_tuple128_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+define amdgpu_kernel void @test_tuple128_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i128 asm sideeffect "s_mov_b64 $0, 0; s_mov_b64 $0+2, 0", "={s[8:11]}"()
+ %lo = trunc i128 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
ret void
}
; Test virtual and physical register overlap
-; GCN-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 18
-; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 18
+; GCN-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 16, LVGPR WT: 0, LSGPR WT: 16
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
-; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_overlap
-; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
-define amdgpu_kernel void @test_vreg_and_physreg_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+define amdgpu_kernel void @test_vreg_and_physreg_live_range_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
%val1 = load i32, ptr addrspace(1) %in1
%val2 = load i32, ptr addrspace(1) %in2
%sum = add i32 %val1, %val2
- store i32 %sum, ptr addrspace(1) %out
+ %r0 = extractvalue { i32, i32 } %result, 0
+ %r1 = extractvalue { i32, i32 } %result, 1
+ %with_asm = add i32 %sum, %r0
+ %final = add i32 %with_asm, %r1
+ store i32 %final, ptr addrspace(1) %out
ret void
}
@@ -155,10 +196,10 @@ entry:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_mov_b32 s10, 0
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -169,12 +210,13 @@ entry:
; GCN-LABEL: test_multiple_physregs:
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_add_i32 s4, s10, s11
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -197,19 +239,13 @@ entry:
; GCN-NEXT: s_mov_b32 s4, s2
; GCN-NEXT: s_mov_b32 s5, s3
; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-NEXT: s_endpgm
; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 11
+; GCN: TotalNumSgprs: 13
; GCN: NumVgprs: 1
-; GCN-LABEL: test_no_inflation:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_endpgm
-; GCN: .set test_no_inflation.numbered_sgpr, 0
-; GCN: TotalNumSgprs: 0
-; GCN: NumVgprs: 0
-
; GCN-LABEL: test_early_clobber:
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -231,10 +267,10 @@ entry:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s11
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
@@ -242,10 +278,28 @@ entry:
; GCN: TotalNumSgprs: 12
; GCN: NumVgprs: 1
-; GCN-LABEL: test_vreg_and_physreg_overlap:
+; GCN-LABEL: test_tuple_physreg:
+; GCN: ;;#ASMSTART
+; GCN: s_mov_b64 s[10:11], 0
+; GCN: ;;#ASMEND
+; GCN: v_mov_b32_e32 v0, s10
+; GCN: .set test_tuple_physreg.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_tuple128_physreg:
+; GCN: ;;#ASMSTART
+; GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN: ;;#ASMEND
+; GCN: v_mov_b32_e32 v0, s8
+; GCN: .set test_tuple128_physreg.numbered_sgpr, 12
+; GCN: TotalNumSgprs: 12
+; GCN: NumVgprs: 1
+
+; GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
; GCN-NEXT: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: ;;#ASMSTART
@@ -260,14 +314,16 @@ entry:
; GCN-NEXT: s_mov_b32 s3, s7
; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NEXT: s_mov_b32 s10, s6
-; GCN-NEXT: s_mov_b32 s11, s7
+; GCN-NEXT: s_mov_b32 s14, s6
+; GCN-NEXT: s_mov_b32 s15, s7
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
; GCN-NEXT: s_endpgm
-; GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 14
+; GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
+; GCN: TotalNumSgprs: 18
; GCN: NumVgprs: 2
; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
@@ -276,10 +332,10 @@ entry:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
; GCN-NOPHYS-NEXT: ;;#ASMSTART
; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NOPHYS-NEXT: s_endpgm
@@ -290,12 +346,13 @@ entry:
; GCN-NOPHYS-LABEL: test_multiple_physregs:
; GCN-NOPHYS-NEXT: ; %bb.0:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
; GCN-NOPHYS-NEXT: ;;#ASMSTART
; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_add_i32 s4, s10, s11
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s4
; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NOPHYS-NEXT: s_endpgm
@@ -318,19 +375,13 @@ entry:
; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GCN-NOPHYS-NEXT: s_endpgm
; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 11
+; GCN-NOPHYS: TotalNumSgprs: 13
; GCN-NOPHYS: NumVgprs: 1
-; GCN-NOPHYS-LABEL: test_no_inflation:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_no_inflation.numbered_sgpr, 0
-; GCN-NOPHYS: TotalNumSgprs: 0
-; GCN-NOPHYS: NumVgprs: 0
-
; GCN-NOPHYS-LABEL: test_early_clobber:
; GCN-NOPHYS-NEXT: ; %bb.0:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -352,10 +403,10 @@ entry:
; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, 0
; GCN-NOPHYS-NEXT: ;;#ASMSTART
; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s11
; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NOPHYS-NEXT: s_endpgm
@@ -363,10 +414,28 @@ entry:
; GCN-NOPHYS: TotalNumSgprs: 12
; GCN-NOPHYS: NumVgprs: 1
-; GCN-NOPHYS-LABEL: test_vreg_and_physreg_overlap:
+; GCN-NOPHYS-LABEL: test_tuple_physreg:
+; GCN-NOPHYS: ;;#ASMSTART
+; GCN-NOPHYS: s_mov_b64 s[10:11], 0
+; GCN-NOPHYS: ;;#ASMEND
+; GCN-NOPHYS: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS: .set test_tuple_physreg.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_tuple128_physreg:
+; GCN-NOPHYS: ;;#ASMSTART
+; GCN-NOPHYS: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN-NOPHYS: ;;#ASMEND
+; GCN-NOPHYS: v_mov_b32_e32 v0, s8
+; GCN-NOPHYS: .set test_tuple128_physreg.numbered_sgpr, 12
+; GCN-NOPHYS: TotalNumSgprs: 12
+; GCN-NOPHYS: NumVgprs: 1
+
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_live_range_overlap:
; GCN-NOPHYS-NEXT: ; %bb.0:
; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
; GCN-NOPHYS-NEXT: ;;#ASMSTART
@@ -381,14 +450,16 @@ entry:
; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, s6
-; GCN-NOPHYS-NEXT: s_mov_b32 s11, s7
+; GCN-NOPHYS-NEXT: s_mov_b32 s14, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s15, s7
; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[12:15], 0
; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 14
+; GCN-NOPHYS: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
+; GCN-NOPHYS: TotalNumSgprs: 18
; GCN-NOPHYS: NumVgprs: 2
; Verify assembly output without GCN trackers (should be identical)
@@ -397,10 +468,10 @@ entry:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
; NO-GCN-NEXT: ;;#ASMSTART
; NO-GCN-NEXT: s_mov_b32 s10, 0
; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NO-GCN-NEXT: s_endpgm
@@ -411,12 +482,13 @@ entry:
; NO-GCN-LABEL: test_multiple_physregs:
; NO-GCN-NEXT: ; %bb.0:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
; NO-GCN-NEXT: ;;#ASMSTART
; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_add_i32 s4, s10, s11
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s4
; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NO-GCN-NEXT: s_endpgm
@@ -439,19 +511,13 @@ entry:
; NO-GCN-NEXT: s_mov_b32 s4, s2
; NO-GCN-NEXT: s_mov_b32 s5, s3
; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
; NO-GCN-NEXT: s_endpgm
; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 11
+; NO-GCN: TotalNumSgprs: 13
; NO-GCN: NumVgprs: 1
-; NO-GCN-LABEL: test_no_inflation:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_no_inflation.numbered_sgpr, 0
-; NO-GCN: TotalNumSgprs: 0
-; NO-GCN: NumVgprs: 0
-
; NO-GCN-LABEL: test_early_clobber:
; NO-GCN-NEXT: ; %bb.0:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
@@ -473,10 +539,10 @@ entry:
; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, 0
; NO-GCN-NEXT: ;;#ASMSTART
; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s11
; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NO-GCN-NEXT: s_endpgm
@@ -484,10 +550,28 @@ entry:
; NO-GCN: TotalNumSgprs: 12
; NO-GCN: NumVgprs: 1
-; NO-GCN-LABEL: test_vreg_and_physreg_overlap:
+; NO-GCN-LABEL: test_tuple_physreg:
+; NO-GCN: ;;#ASMSTART
+; NO-GCN: s_mov_b64 s[10:11], 0
+; NO-GCN: ;;#ASMEND
+; NO-GCN: v_mov_b32_e32 v0, s10
+; NO-GCN: .set test_tuple_physreg.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_tuple128_physreg:
+; NO-GCN: ;;#ASMSTART
+; NO-GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; NO-GCN: ;;#ASMEND
+; NO-GCN: v_mov_b32_e32 v0, s8
+; NO-GCN: .set test_tuple128_physreg.numbered_sgpr, 12
+; NO-GCN: TotalNumSgprs: 12
+; NO-GCN: NumVgprs: 1
+
+; NO-GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
; NO-GCN-NEXT: ; %bb.0:
; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; NO-GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; NO-GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
; NO-GCN-NEXT: s_mov_b32 s6, -1
; NO-GCN-NEXT: ;;#ASMSTART
@@ -502,12 +586,14 @@ entry:
; NO-GCN-NEXT: s_mov_b32 s3, s7
; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; NO-GCN-NEXT: s_mov_b32 s10, s6
-; NO-GCN-NEXT: s_mov_b32 s11, s7
+; NO-GCN-NEXT: s_mov_b32 s14, s6
+; NO-GCN-NEXT: s_mov_b32 s15, s7
; NO-GCN-NEXT: s_waitcnt vmcnt(0)
; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_vreg_and_physreg_overlap.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 14
+; NO-GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
+; NO-GCN: TotalNumSgprs: 18
; NO-GCN: NumVgprs: 2
>From 9acf77d84056cb7584bf6da4fc879ff36082545f Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 24 Mar 2026 17:32:26 -0500
Subject: [PATCH 3/7] [AMDGPU] Addressed reviewer comments.
Make some pressure inc() logic unconditional to match existing code.
Un-invert some logic and naming for regunit management.
Use auto-gen script for new test while maintaining debug messages.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 22 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 6 +-
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 788 +++++++++---------
3 files changed, 386 insertions(+), 430 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index efd4599c89509..75a332985e467 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -115,10 +115,8 @@ void GCNRegPressure::inc(MCRegister Reg, bool IsAdd,
if (TRI->getRegSizeInBits(*RC) != 32) {
unsigned TupleIdx = TOTAL_KINDS + RegKind;
Value[TupleIdx] += Sign * TRI->getRegClassWeight(RC).RegWeight;
- Value[RegKind] += Sign * static_cast<int>(NumRegs);
- } else {
- Value[RegKind] += Sign;
}
+ Value[RegKind] += Sign * static_cast<int>(NumRegs);
}
namespace {
@@ -546,16 +544,16 @@ bool GCNRPTracker::eraseAllLiveUnits(MCRegister Reg) {
return WasLive;
}
-bool GCNRPTracker::insertAllNotLiveUnits(MCRegister Reg) {
+bool GCNRPTracker::insertIfNotLive(MCRegister Reg) {
assert(MRI && "MRI not initialized");
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const BitVector &Units = PhysLiveRegs.getBitVector();
- bool WasNotLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
+ bool NewlyLive = llvm::any_of(TRI->regunits(Reg), [&](MCRegUnit Unit) {
return !Units.test(static_cast<unsigned>(Unit));
});
- if (WasNotLive)
+ if (NewlyLive)
PhysLiveRegs.addReg(Reg);
- return WasNotLive;
+ return NewlyLive;
}
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
@@ -733,13 +731,11 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
Register Reg = MO.getReg();
if (!MRI->isAllocatable(Reg))
continue;
- // Check if any unit of this register was not live before and if so,
- // insert all of the regunits into PhysLiveRegs.
- bool WasNotLive = insertAllNotLiveUnits(Reg.asMCReg());
+ // Insert regunits into PhysLiveRegs if not already live.
+ bool NewlyLive = insertIfNotLive(Reg.asMCReg());
- // Update pressure once per register if any unit of this register was not
- // live before.
- if (WasNotLive)
+ // Update pressure once per register if it became newly live.
+ if (NewlyLive)
CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 04000e33aae28..90198e602b68a 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -390,9 +390,9 @@ class GCNRPTracker {
// Returns true if any unit was live (and thus erased).
bool eraseAllLiveUnits(MCRegister Reg);
- // Insert all not-live units of Reg into PhysLiveRegs.
- // Returns true if any unit was not live (and thus inserted).
- bool insertAllNotLiveUnits(MCRegister Reg);
+ // Insert units of Reg into PhysLiveRegs if not already live.
+ // Returns true if any unit was newly inserted.
+ bool insertIfNotLive(MCRegister Reg);
public:
// Enable physical register tracking only if both GCNTrackers and
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index fdb10483d9a42..4ead3098a7f68 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -1,9 +1,10 @@
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
-; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN %s
+; RUN: FileCheck --check-prefix=GCN-DEBUG %s < %t
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=NO-GCN %s
+; RUN: FileCheck --check-prefix=GENERIC-DEBUG %s < %t
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN-NOPHYS %s
+; RUN: FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s < %t
; REQUIRES: asserts
; Test that GCN trackers correctly track physical register pressure from inline asm
@@ -21,6 +22,44 @@
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+; GCN-LABEL: test_single_physreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_single_physreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_single_physreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
store i32 %val, ptr addrspace(1) %out
@@ -42,6 +81,47 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+; GCN-LABEL: test_multiple_physregs:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_add_i32 s4, s10, s11
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_multiple_physregs:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_add_i32 s4, s10, s11
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s4
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_multiple_physregs:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_add_i32 s4, s10, s11
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
%r0 = extractvalue { i32, i32 } %result, 0
@@ -66,6 +146,62 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; GCN-LABEL: test_physreg_with_vreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_mov_b32 s4, s2
+; GCN-NEXT: s_mov_b32 s5, s3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_physreg_with_vreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_mov_b32 s4, s2
+; NO-GCN-NEXT: s_mov_b32 s5, s3
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
%val = load i32, ptr addrspace(1) %in
@@ -89,6 +225,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+; GCN-LABEL: test_early_clobber:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_early_clobber:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_early_clobber:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
store i32 %val, ptr addrspace(1) %out
@@ -110,6 +284,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
+; GCN-LABEL: test_physreg_input:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_physreg_input:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s11
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_physreg_input:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s11
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
store i32 %val, ptr addrspace(1) %out
@@ -132,6 +344,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
define amdgpu_kernel void @test_tuple_physreg(ptr addrspace(1) %out) {
+; GCN-LABEL: test_tuple_physreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b64 s[10:11], 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_tuple_physreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b64 s[10:11], 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_tuple_physreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b64 s[10:11], 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i64 asm sideeffect "s_mov_b64 $0, 0", "={s[10:11]}"()
%lo = trunc i64 %val to i32
@@ -155,6 +405,44 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
define amdgpu_kernel void @test_tuple128_physreg(ptr addrspace(1) %out) {
+; GCN-LABEL: test_tuple128_physreg:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_tuple128_physreg:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s8
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_tuple128_physreg:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%val = call i128 asm sideeffect "s_mov_b64 $0, 0; s_mov_b64 $0+2, 0", "={s[8:11]}"()
%lo = trunc i128 %val to i32
@@ -177,6 +465,86 @@ entry:
; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
define amdgpu_kernel void @test_vreg_and_physreg_live_range_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+; GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: s_mov_b32 s0, s2
+; GCN-NEXT: s_mov_b32 s1, s3
+; GCN-NEXT: s_mov_b32 s2, s6
+; GCN-NEXT: s_mov_b32 s3, s7
+; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NEXT: s_mov_b32 s14, s6
+; GCN-NEXT: s_mov_b32 s15, s7
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; NO-GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s6, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: s_mov_b32 s4, s0
+; NO-GCN-NEXT: s_mov_b32 s5, s1
+; NO-GCN-NEXT: s_mov_b32 s0, s2
+; NO-GCN-NEXT: s_mov_b32 s1, s3
+; NO-GCN-NEXT: s_mov_b32 s2, s6
+; NO-GCN-NEXT: s_mov_b32 s3, s7
+; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; NO-GCN-NEXT: s_mov_b32 s14, s6
+; NO-GCN-NEXT: s_mov_b32 s15, s7
+; NO-GCN-NEXT: s_waitcnt vmcnt(0)
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_vreg_and_physreg_live_range_overlap:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
+; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
+; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
+; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
+; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
+; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_mov_b32 s14, s6
+; GCN-NOPHYS-NEXT: s_mov_b32 s15, s7
+; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s11, v0
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[12:15], 0
+; GCN-NOPHYS-NEXT: s_endpgm
entry:
%result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
%val1 = load i32, ptr addrspace(1) %in1
@@ -189,411 +557,3 @@ entry:
store i32 %final, ptr addrspace(1) %out
ret void
}
-
-; Verify assembly output for GCN trackers
-; GCN-LABEL: test_single_physreg:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_single_physreg.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 11
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_multiple_physregs:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_add_i32 s4, s10, s11
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_multiple_physregs.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_physreg_with_vreg:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s4, s0
-; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NEXT: s_mov_b32 s4, s2
-; GCN-NEXT: s_mov_b32 s5, s3
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 13
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_early_clobber:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_early_clobber.numbered_sgpr, 11
-; GCN: TotalNumSgprs: 11
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_physreg_input:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NEXT: s_mov_b32 s2, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_mov_b32_e32 v0, s11
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_physreg_input.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_tuple_physreg:
-; GCN: ;;#ASMSTART
-; GCN: s_mov_b64 s[10:11], 0
-; GCN: ;;#ASMEND
-; GCN: v_mov_b32_e32 v0, s10
-; GCN: .set test_tuple_physreg.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_tuple128_physreg:
-; GCN: ;;#ASMSTART
-; GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
-; GCN: ;;#ASMEND
-; GCN: v_mov_b32_e32 v0, s8
-; GCN: .set test_tuple128_physreg.numbered_sgpr, 12
-; GCN: TotalNumSgprs: 12
-; GCN: NumVgprs: 1
-
-; GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
-; GCN-NEXT: ; %bb.0:
-; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
-; GCN-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_mov_b32 s4, s0
-; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: s_mov_b32 s0, s2
-; GCN-NEXT: s_mov_b32 s1, s3
-; GCN-NEXT: s_mov_b32 s2, s6
-; GCN-NEXT: s_mov_b32 s3, s7
-; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NEXT: s_mov_b32 s14, s6
-; GCN-NEXT: s_mov_b32 s15, s7
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
-; GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
-; GCN-NEXT: s_endpgm
-; GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
-; GCN: TotalNumSgprs: 18
-; GCN: NumVgprs: 2
-
-; Verify assembly output with GCN trackers but physical register tracking disabled (same as GCN)
-; GCN-NOPHYS-LABEL: test_single_physreg:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_single_physreg.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 11
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_multiple_physregs:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: s_add_i32 s4, s10, s11
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s4
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_multiple_physregs.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_physreg_with_vreg:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
-; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
-; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NOPHYS-NEXT: s_mov_b32 s4, s2
-; GCN-NOPHYS-NEXT: s_mov_b32 s5, s3
-; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_physreg_with_vreg.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 13
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_early_clobber:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_early_clobber.numbered_sgpr, 11
-; GCN-NOPHYS: TotalNumSgprs: 11
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_physreg_input:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s11
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_physreg_input.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_tuple_physreg:
-; GCN-NOPHYS: ;;#ASMSTART
-; GCN-NOPHYS: s_mov_b64 s[10:11], 0
-; GCN-NOPHYS: ;;#ASMEND
-; GCN-NOPHYS: v_mov_b32_e32 v0, s10
-; GCN-NOPHYS: .set test_tuple_physreg.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_tuple128_physreg:
-; GCN-NOPHYS: ;;#ASMSTART
-; GCN-NOPHYS: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
-; GCN-NOPHYS: ;;#ASMEND
-; GCN-NOPHYS: v_mov_b32_e32 v0, s8
-; GCN-NOPHYS: .set test_tuple128_physreg.numbered_sgpr, 12
-; GCN-NOPHYS: TotalNumSgprs: 12
-; GCN-NOPHYS: NumVgprs: 1
-
-; GCN-NOPHYS-LABEL: test_vreg_and_physreg_live_range_overlap:
-; GCN-NOPHYS-NEXT: ; %bb.0:
-; GCN-NOPHYS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-NOPHYS-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
-; GCN-NOPHYS-NEXT: s_mov_b32 s7, 0xf000
-; GCN-NOPHYS-NEXT: s_mov_b32 s6, -1
-; GCN-NOPHYS-NEXT: ;;#ASMSTART
-; GCN-NOPHYS-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; GCN-NOPHYS-NEXT: ;;#ASMEND
-; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NOPHYS-NEXT: s_mov_b32 s4, s0
-; GCN-NOPHYS-NEXT: s_mov_b32 s5, s1
-; GCN-NOPHYS-NEXT: s_mov_b32 s0, s2
-; GCN-NOPHYS-NEXT: s_mov_b32 s1, s3
-; GCN-NOPHYS-NEXT: s_mov_b32 s2, s6
-; GCN-NOPHYS-NEXT: s_mov_b32 s3, s7
-; GCN-NOPHYS-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; GCN-NOPHYS-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; GCN-NOPHYS-NEXT: s_mov_b32 s14, s6
-; GCN-NOPHYS-NEXT: s_mov_b32 s15, s7
-; GCN-NOPHYS-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; GCN-NOPHYS-NEXT: v_add_i32_e32 v0, vcc, s11, v0
-; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[12:15], 0
-; GCN-NOPHYS-NEXT: s_endpgm
-; GCN-NOPHYS: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
-; GCN-NOPHYS: TotalNumSgprs: 18
-; GCN-NOPHYS: NumVgprs: 2
-
-; Verify assembly output without GCN trackers (should be identical)
-; NO-GCN-LABEL: test_single_physreg:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_single_physreg.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 11
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_multiple_physregs:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: s_add_i32 s4, s10, s11
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s4
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_multiple_physregs.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_physreg_with_vreg:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s6, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: s_mov_b32 s4, s0
-; NO-GCN-NEXT: s_mov_b32 s5, s1
-; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; NO-GCN-NEXT: s_mov_b32 s4, s2
-; NO-GCN-NEXT: s_mov_b32 s5, s3
-; NO-GCN-NEXT: s_waitcnt vmcnt(0)
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_physreg_with_vreg.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 13
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_early_clobber:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_early_clobber.numbered_sgpr, 11
-; NO-GCN: TotalNumSgprs: 11
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_physreg_input:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s2, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 5; s_add_u32 s11, s10, 1
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: v_mov_b32_e32 v0, s11
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_physreg_input.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_tuple_physreg:
-; NO-GCN: ;;#ASMSTART
-; NO-GCN: s_mov_b64 s[10:11], 0
-; NO-GCN: ;;#ASMEND
-; NO-GCN: v_mov_b32_e32 v0, s10
-; NO-GCN: .set test_tuple_physreg.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_tuple128_physreg:
-; NO-GCN: ;;#ASMSTART
-; NO-GCN: s_mov_b64 s[8:11], 0; s_mov_b64 s[8:11]+2, 0
-; NO-GCN: ;;#ASMEND
-; NO-GCN: v_mov_b32_e32 v0, s8
-; NO-GCN: .set test_tuple128_physreg.numbered_sgpr, 12
-; NO-GCN: TotalNumSgprs: 12
-; NO-GCN: NumVgprs: 1
-
-; NO-GCN-LABEL: test_vreg_and_physreg_live_range_overlap:
-; NO-GCN-NEXT: ; %bb.0:
-; NO-GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; NO-GCN-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
-; NO-GCN-NEXT: s_mov_b32 s7, 0xf000
-; NO-GCN-NEXT: s_mov_b32 s6, -1
-; NO-GCN-NEXT: ;;#ASMSTART
-; NO-GCN-NEXT: s_mov_b32 s10, 0; s_mov_b32 s11, 1
-; NO-GCN-NEXT: ;;#ASMEND
-; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
-; NO-GCN-NEXT: s_mov_b32 s4, s0
-; NO-GCN-NEXT: s_mov_b32 s5, s1
-; NO-GCN-NEXT: s_mov_b32 s0, s2
-; NO-GCN-NEXT: s_mov_b32 s1, s3
-; NO-GCN-NEXT: s_mov_b32 s2, s6
-; NO-GCN-NEXT: s_mov_b32 s3, s7
-; NO-GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0
-; NO-GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; NO-GCN-NEXT: s_mov_b32 s14, s6
-; NO-GCN-NEXT: s_mov_b32 s15, s7
-; NO-GCN-NEXT: s_waitcnt vmcnt(0)
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s10, v0
-; NO-GCN-NEXT: v_add_i32_e32 v0, vcc, s11, v0
-; NO-GCN-NEXT: buffer_store_dword v0, off, s[12:15], 0
-; NO-GCN-NEXT: s_endpgm
-; NO-GCN: .set test_vreg_and_physreg_live_range_overlap.numbered_sgpr, 16
-; NO-GCN: TotalNumSgprs: 18
-; NO-GCN: NumVgprs: 2
>From 621f0a6cadc00328a7d7108a8efc2bd0bbcf3faf Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Wed, 25 Mar 2026 00:44:04 -0500
Subject: [PATCH 4/7] Added test for early clobber with a tuple register.
---
.../AMDGPU/schedule-gcn-physreg-pressure.ll | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
index 4ead3098a7f68..7927712f36b5c 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-gcn-physreg-pressure.ll
@@ -269,6 +269,77 @@ entry:
ret void
}
+; Test early-clobber constraint with a tuple (64-bit) register.
+; The input s12 and early-clobber output s[10:11] have distinct live ranges.
+
+; GCN-DEBUG-LABEL: test_early_clobber_tuple
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 8
+
+; GENERIC-DEBUG-LABEL: test_early_clobber_tuple
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber_tuple
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber_tuple(ptr addrspace(1) %out) {
+; GCN-LABEL: test_early_clobber_tuple:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b32 s12, 42
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: s_mov_b64 s[10:11], s12
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; NO-GCN-LABEL: test_early_clobber_tuple:
+; NO-GCN: ; %bb.0: ; %entry
+; NO-GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; NO-GCN-NEXT: s_mov_b32 s3, 0xf000
+; NO-GCN-NEXT: s_mov_b32 s2, -1
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b32 s12, 42
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: ;;#ASMSTART
+; NO-GCN-NEXT: s_mov_b64 s[10:11], s12
+; NO-GCN-NEXT: ;;#ASMEND
+; NO-GCN-NEXT: v_mov_b32_e32 v0, s10
+; NO-GCN-NEXT: s_waitcnt lgkmcnt(0)
+; NO-GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; NO-GCN-NEXT: s_endpgm
+;
+; GCN-NOPHYS-LABEL: test_early_clobber_tuple:
+; GCN-NOPHYS: ; %bb.0: ; %entry
+; GCN-NOPHYS-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NOPHYS-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NOPHYS-NEXT: s_mov_b32 s2, -1
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b32 s12, 42
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: ;;#ASMSTART
+; GCN-NOPHYS-NEXT: s_mov_b64 s[10:11], s12
+; GCN-NOPHYS-NEXT: ;;#ASMEND
+; GCN-NOPHYS-NEXT: v_mov_b32_e32 v0, s10
+; GCN-NOPHYS-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NOPHYS-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NOPHYS-NEXT: s_endpgm
+entry:
+ %in = call i32 asm sideeffect "s_mov_b32 $0, 42", "={s12}"()
+ %val = call i64 asm sideeffect "s_mov_b64 $0, $1", "=&{s[10:11]},{s12}"(i32 %in)
+ %lo = trunc i64 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
+ ret void
+}
+
; Test physical register input
; GCN-DEBUG-LABEL: test_physreg_input
>From 974d66324b1a50c15a85c14510910f6bc1cf7643 Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 30 Mar 2026 17:50:04 -0500
Subject: [PATCH 5/7] Folded multiple RUN lines into one while maintaining
debug msgs.
---
.../CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
index 252875377d1ea..98e04ea74a993 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 < %s | FileCheck --check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN-GCNTRACKERS %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=SCHED-GCNTRACKERS %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN %s
+; RUN: FileCheck --check-prefix=SCHED %s < %t
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-s-branch-bits=5 -amdgpu-long-branch-factor=0 -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2> %t | FileCheck --check-prefix=GCN-GCNTRACKERS %s
+; RUN: FileCheck --check-prefix=SCHED-GCNTRACKERS %s < %t
; REQUIRES: asserts
; CHECK-LABEL: {{^}}spill:
; GCN: NumSgprs: 104
>From 3bc59994ab93b18c10379a8e51b0ca41d615fbeb Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Mon, 30 Mar 2026 19:35:09 -0500
Subject: [PATCH 6/7] Added early clobber support to physical register
tracking.
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 75a332985e467..705577ca7291f 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -706,6 +706,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// Track physical register defs and uses (only if enabled).
if (TrackPhysRegs) {
+ GCNRegPressure ECPhysDefPressure;
+ bool HasECPhysDefs = false;
+
// Kill physical register defs (moving backward in upward tracking).
for (const MachineOperand &MO : MI.all_defs()) {
if (!MO.getReg().isPhysical())
@@ -714,6 +717,11 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (!MRI->isAllocatable(Reg))
continue;
+ if (MO.isEarlyClobber()) {
+ ECPhysDefPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
+ HasECPhysDefs = true;
+ }
+
// Check if any unit of this register was live before and if so,
// erase all of the regunits from PhysLiveRegs.
bool WasLive = eraseAllLiveUnits(Reg.asMCReg());
@@ -739,7 +747,10 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPhysPressure.inc(Reg.asMCReg(), /*IsAdd=*/true, *MRI);
}
- MaxPhysPressure = max(MaxPhysPressure, CurPhysPressure);
+ // Early-clobber physical defs are live alongside uses.
+ MaxPhysPressure = HasECPhysDefs ? max(CurPhysPressure + ECPhysDefPressure,
+ MaxPhysPressure)
+ : max(CurPhysPressure, MaxPhysPressure);
}
assert(CurVirtPressure == getRegPressure(*MRI, VirtLiveRegs));
>From 0cd47f525437e333c8528299568f401de37beeee Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Tue, 31 Mar 2026 04:21:00 -0500
Subject: [PATCH 7/7] Added a new test for physical register early clobber.
---
.../regpressure-physreg-early-clobber.mir | 43 +++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir
new file mode 100644
index 0000000000000..516db40a5af76
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regpressure-physreg-early-clobber.mir
@@ -0,0 +1,43 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 %s -filetype=null 2>&1 | FileCheck %s --check-prefix=RPU
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --filetype=null --run-pass=amdgpu-print-rp -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 %s -filetype=null 2>&1 | FileCheck %s --check-prefix=RPU-NOPHYS
+
+# Test that the upward register pressure tracker accounts for early-clobber
+# physical register defs overlapping with physical register uses.
+# With physreg tracking, the EC def s[10:11] (2 SGPRs) overlaps with the use
+# s12 (1 SGPR), producing higher max pressure at the INLINEASM instruction.
+
+---
+name: ec_physreg
+tracksRegLiveness: true
+machineFunctionInfo:
+ sgprForEXECCopy: '$sgpr100_sgpr101'
+body: |
+ ; RPU-LABEL: name: ec_physreg
+ ; RPU: bb.0:
+ ; RPU: SGPR VGPR
+ ; RPU: 3 1
+ ; RPU: 5 1 INLINEASM &"s_mov_b64 $0, $1" {{.*}} early-clobber $sgpr10_sgpr11
+ ; RPU: 4 1
+ ;
+ ; RPU-NOPHYS-LABEL: name: ec_physreg
+ ; RPU-NOPHYS: bb.0:
+ ; RPU-NOPHYS: SGPR VGPR
+ ; RPU-NOPHYS: 2 1
+ ; RPU-NOPHYS: 2 1 INLINEASM &"s_mov_b64 $0, $1" {{.*}} early-clobber $sgpr10_sgpr11
+ ; RPU-NOPHYS: 2 1
+ bb.0:
+ liveins: $sgpr8_sgpr9
+
+ %0:sgpr_64(p4) = COPY $sgpr8_sgpr9
+ %1:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4)
+ %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ INLINEASM &"s_mov_b32 $0, 42", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr12
+ %3:sreg_32 = COPY $sgpr12
+ $sgpr12 = COPY %3
+ INLINEASM &"s_mov_b64 $0, $1", 1 /* sideeffect attdialect */, 11 /* regdef-ec */, implicit-def early-clobber $sgpr10_sgpr11, 9 /* reguse */, $sgpr12
+ %4:sreg_64 = COPY $sgpr10_sgpr11
+ %5:sreg_32 = COPY %4.sub0
+ %6:vgpr_32 = COPY %5
+ GLOBAL_STORE_DWORD_SADDR %2, %6, %1, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ S_ENDPGM 0
+...
More information about the llvm-branch-commits
mailing list