[llvm] [AMDGPU][Scheduler] Support for rematerializing SGPRs and AGPRs (PR #140036)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 15 03:15:27 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Lucas Ramirez (lucas-rami)
<details>
<summary>Changes</summary>
This adds the ability to rematerialize SGPRs and AGPRs to the scheduler's `PreRARematStage`, which can currently only rematerialize ArchVGPRs. This also fixes a small potential issue in the stage where, in case of spilling, the target occupancy could be set to a lower than expected value when the function had either one of the "amdgpu-num-sgpr" or "amdgpu-num-vgpr" attributes set.
---
Patch is 95.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140036.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp (+59-64)
- (modified) llvm/lib/Target/AMDGPU/GCNSchedStrategy.h (-2)
- (modified) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir (+402-72)
- (modified) llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats.mir (+183-183)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a9c891a1f1dd1..6f6abf5a20930 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1702,6 +1702,8 @@ namespace {
/// Models excess register pressure in a region and tracks our progress as we
/// identify rematerialization opportunities.
struct ExcessRP {
+ /// Number of excess SGPRs.
+ unsigned SGPRs = 0;
/// Number of excess ArchVGPRs.
unsigned ArchVGPRs = 0;
/// Number of excess AGPRs.
@@ -1717,8 +1719,13 @@ struct ExcessRP {
bool UnifiedRF;
/// Constructs the excess RP model; determines the excess pressure w.r.t. a
- /// maximum number of allowed VGPRs.
- ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxVGPRs);
+ /// maximum number of allowed SGPRs/VGPRs.
+ ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP, unsigned MaxSGPRs,
+ unsigned MaxVGPRs);
+
+ /// Accounts for \p NumRegs saved SGPRs in the model. Returns whether saving
+ /// these SGPRs helped reduce excess pressure.
+ bool saveSGPRs(unsigned NumRegs) { return saveRegs(SGPRs, NumRegs); }
/// Accounts for \p NumRegs saved ArchVGPRs in the model. If \p
/// UseArchVGPRForAGPRSpill is true, saved ArchVGPRs are used to save excess
@@ -1726,17 +1733,20 @@ struct ExcessRP {
/// saving these ArchVGPRs helped reduce excess pressure.
bool saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill);
- /// Accounts for \p NumRegs saved AGPRS in the model. Returns whether saving
- /// these ArchVGPRs helped reduce excess pressure.
- bool saveAGPRs(unsigned NumRegs);
+ /// Accounts for \p NumRegs saved AGPRs in the model. Returns whether saving
+ /// these AGPRs helped reduce excess pressure.
+ bool saveAGPRs(unsigned NumRegs) {
+ return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
+ }
/// Returns whether there is any excess register pressure.
- operator bool() const { return ArchVGPRs != 0 || AGPRs != 0 || VGPRs != 0; }
+ operator bool() const { return SGPRs || ArchVGPRs || AGPRs || VGPRs; }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
friend raw_ostream &operator<<(raw_ostream &OS, const ExcessRP &Excess) {
- OS << Excess.ArchVGPRs << " ArchVGPRs, " << Excess.AGPRs << " AGPRs, and "
- << Excess.VGPRs << " VGPRs (next ArchVGPR aligment in "
+ OS << Excess.SGPRs << " SGPRs, " << Excess.ArchVGPRs << " ArchVGPRs, and "
+ << Excess.AGPRs << " AGPRs, (" << Excess.VGPRs
+ << " VGPRs in total, next ArchVGPR aligment in "
<< Excess.ArchVGPRsToAlignment << " registers)\n";
return OS;
}
@@ -1753,12 +1763,17 @@ struct ExcessRP {
} // namespace
ExcessRP::ExcessRP(const GCNSubtarget &ST, const GCNRegPressure &RP,
- unsigned MaxVGPRs)
+ unsigned MaxSGPRs, unsigned MaxVGPRs)
: UnifiedRF(ST.hasGFX90AInsts()) {
+ // Compute excess SGPR pressure.
+ unsigned NumSGPRs = RP.getSGPRNum();
+ if (NumSGPRs > MaxSGPRs)
+ SGPRs = NumSGPRs - MaxSGPRs;
+
+ // Compute excess ArchVGPR/AGPR pressure.
unsigned NumArchVGPRs = RP.getArchVGPRNum();
unsigned NumAGPRs = RP.getAGPRNum();
HasAGPRs = NumAGPRs;
-
if (!UnifiedRF) {
// Non-unified RF. Account for excess pressure for ArchVGPRs and AGPRs
// independently.
@@ -1839,10 +1854,6 @@ bool ExcessRP::saveArchVGPRs(unsigned NumRegs, bool UseArchVGPRForAGPRSpill) {
return Progress;
}
-bool ExcessRP::saveAGPRs(unsigned NumRegs) {
- return saveRegs(AGPRs, NumRegs) || saveRegs(VGPRs, NumRegs);
-}
-
bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
@@ -1865,46 +1876,19 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
const unsigned MaxVGPRsIncOcc = ST.getMaxNumVGPRs(DAG.MinOccupancy + 1);
IncreaseOccupancy = WavesPerEU.second > DAG.MinOccupancy;
- auto ClearOptRegionsIf = [&](bool Cond) -> bool {
- if (Cond) {
- // We won't try to increase occupancy.
- IncreaseOccupancy = false;
- OptRegions.clear();
- }
- return Cond;
- };
-
// Collect optimizable regions. If there is spilling in any region we will
- // just try to reduce ArchVGPR spilling. Otherwise we will try to increase
- // occupancy by one in the whole function.
+ // just try to reduce spilling. Otherwise we will try to increase occupancy by
+ // one in the whole function.
for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
GCNRegPressure &RP = DAG.Pressure[I];
-
- // Check whether SGPR pressures prevents us from eliminating spilling.
- unsigned NumSGPRs = RP.getSGPRNum();
- if (NumSGPRs > MaxSGPRsNoSpill)
- ClearOptRegionsIf(IncreaseOccupancy);
-
- ExcessRP Excess(ST, RP, MaxVGPRsNoSpill);
- if (Excess) {
- ClearOptRegionsIf(IncreaseOccupancy);
+ ExcessRP Excess(ST, RP, MaxSGPRsNoSpill, MaxVGPRsNoSpill);
+ if (Excess && IncreaseOccupancy) {
+ // There is spilling in the region and we were so far trying to increase
+ // occupancy. Strop trying that and focus on reducing spilling.
+ IncreaseOccupancy = false;
+ OptRegions.clear();
} else if (IncreaseOccupancy) {
- // Check whether SGPR pressure prevents us from increasing occupancy.
- if (ClearOptRegionsIf(NumSGPRs > MaxSGPRsIncOcc)) {
- if (DAG.MinOccupancy >= WavesPerEU.first)
- return false;
- continue;
- }
- if ((Excess = ExcessRP(ST, RP, MaxVGPRsIncOcc))) {
- // We can only rematerialize ArchVGPRs at this point.
- unsigned NumArchVGPRsToRemat = Excess.ArchVGPRs + Excess.VGPRs;
- bool NotEnoughArchVGPRs = NumArchVGPRsToRemat > RP.getArchVGPRNum();
- if (ClearOptRegionsIf(Excess.AGPRs || NotEnoughArchVGPRs)) {
- if (DAG.MinOccupancy >= WavesPerEU.first)
- return false;
- continue;
- }
- }
+ Excess = ExcessRP(ST, RP, MaxSGPRsIncOcc, MaxVGPRsIncOcc);
}
if (Excess)
OptRegions.insert({I, Excess});
@@ -1924,23 +1908,34 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
#endif
// When we are reducing spilling, the target is the minimum target number of
- // waves/EU determined by the subtarget.
- TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1 : WavesPerEU.first;
+ // waves/EU determined by the subtarget. In cases where either one of
+ // "amdgpu-num-sgpr" or "amdgpu-num-vgpr" are set on the function, the current
+ // minimum region occupancy may be higher than the latter.
+ TargetOcc = IncreaseOccupancy ? DAG.MinOccupancy + 1
+ : std::max(DAG.MinOccupancy, WavesPerEU.first);
// Accounts for a reduction in RP in an optimizable region. Returns whether we
// estimate that we have identified enough rematerialization opportunities to
// achieve our goal, and sets Progress to true when this particular reduction
// in pressure was helpful toward that goal.
auto ReduceRPInRegion = [&](auto OptIt, LaneBitmask Mask,
+ const TargetRegisterClass *RC,
bool &Progress) -> bool {
ExcessRP &Excess = OptIt->getSecond();
- // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
- // only when we are just trying to eliminate spilling to memory. At this
- // point we err on the conservative side and do not increase
- // register-to-register spilling for the sake of increasing occupancy.
- Progress |=
- Excess.saveArchVGPRs(SIRegisterInfo::getNumCoveredRegs(Mask),
- /*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
+ unsigned NumRegs = SIRegisterInfo::getNumCoveredRegs(Mask);
+ if (SRI->isSGPRClass(RC)) {
+ Progress |= Excess.saveSGPRs(NumRegs);
+ } else if (SRI->isAGPRClass(RC)) {
+ Progress |= Excess.saveAGPRs(NumRegs);
+ } else {
+ // We allow saved ArchVGPRs to be considered as free spill slots for AGPRs
+ // only when we are just trying to eliminate spilling to memory. At this
+ // point we err on the conservative side and do not increase
+ // register-to-register spilling for the sake of increasing occupancy.
+ Progress |=
+ Excess.saveArchVGPRs(NumRegs,
+ /*UseArchVGPRForAGPRSpill=*/!IncreaseOccupancy);
+ }
if (!Excess)
OptRegions.erase(OptIt->getFirst());
return OptRegions.empty();
@@ -1962,10 +1957,9 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
if (!isTriviallyReMaterializable(DefMI))
continue;
- // We only support rematerializing virtual VGPRs with one definition.
+ // We only support rematerializing virtual registers with one definition.
Register Reg = DefMI.getOperand(0).getReg();
- if (!Reg.isVirtual() || !SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
- !DAG.MRI.hasOneDef(Reg))
+ if (!Reg.isVirtual() || !DAG.MRI.hasOneDef(Reg))
continue;
// We only care to rematerialize the instruction if it has a single
@@ -2003,6 +1997,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
Rematerializations.try_emplace(&DefMI, UseMI).first->second;
bool RematUseful = false;
+ const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
if (auto It = OptRegions.find(I); It != OptRegions.end()) {
// Optimistically consider that moving the instruction out of its
// defining region will reduce RP in the latter; this assumes that
@@ -2010,7 +2005,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction and the end of the region.
REMAT_DEBUG(dbgs() << " Defining region is optimizable\n");
LaneBitmask Mask = DAG.RegionLiveOuts.getLiveRegsForRegionIdx(I)[Reg];
- if (ReduceRPInRegion(It, Mask, RematUseful))
+ if (ReduceRPInRegion(It, Mask, RC, RematUseful))
return true;
}
@@ -2030,7 +2025,7 @@ bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
// instruction's use.
if (auto It = OptRegions.find(LIRegion); It != OptRegions.end()) {
REMAT_DEBUG(dbgs() << " Live-in in region " << LIRegion << '\n');
- if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RematUseful))
+ if (ReduceRPInRegion(It, DAG.LiveIns[LIRegion][Reg], RC, RematUseful))
return true;
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index ca4ab4a2c560f..e20ad34797541 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -444,8 +444,6 @@ class ClusteredLowOccStage : public GCNSchedStage {
/// estimates reducing spilling or increasing occupancy is possible, as few
/// instructions as possible are rematerialized to reduce potential negative
/// effects on function latency.
-///
-/// TODO: We should extend this to work on SGPRs and AGPRs as well.
class PreRARematStage : public GCNSchedStage {
private:
/// Useful information about a rematerializable instruction.
diff --git a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
index f5558964d2707..ba42f9f1d1860 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-scheduler-sink-trivial-remats-attr.mir
@@ -3,6 +3,9 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX90A %s
--- |
+ define void @small_num_sgprs_as_spill() "amdgpu-num-sgpr"="85" {
+ ret void
+ }
define void @small_num_vgprs_as_spill() "amdgpu-num-vgpr"="28" {
ret void
}
@@ -15,13 +18,333 @@
define void @reduce_arch_and_acc_vgrp_spill() "amdgpu-waves-per-eu"="8,8" {
ret void
}
- define void @reduce_spill_archvgpr_above_addressable_limit() "amdgpu-waves-per-eu"="1,10" {
+ define void @reduce_spill_archvgpr_above_addressable_limit() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
ret void
}
- define void @reduce_spill_agpr_above_addressable_limit() "amdgpu-waves-per-eu"="1,10" {
+ define void @reduce_spill_agpr_above_addressable_limit() "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" {
ret void
}
---
+# User-requested maximum number of SGPRs need to be taken into account by
+# the scheduler's rematerialization stage. Register usage above that number
+# is considered like spill.
+name: small_num_sgprs_as_spill
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ ; GFX908-LABEL: name: small_num_sgprs_as_spill
+ ; GFX908: bb.0:
+ ; GFX908-NEXT: successors: %bb.1(0x80000000)
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 1
+ ; GFX908-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 2
+ ; GFX908-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 3
+ ; GFX908-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 4
+ ; GFX908-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 5
+ ; GFX908-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 6
+ ; GFX908-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 7
+ ; GFX908-NEXT: [[S_MOV_B32_7:%[0-9]+]]:sgpr_32 = S_MOV_B32 8
+ ; GFX908-NEXT: [[S_MOV_B32_8:%[0-9]+]]:sgpr_32 = S_MOV_B32 9
+ ; GFX908-NEXT: [[S_MOV_B32_9:%[0-9]+]]:sgpr_32 = S_MOV_B32 10
+ ; GFX908-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sgpr_32 = S_MOV_B32 11
+ ; GFX908-NEXT: [[S_MOV_B32_11:%[0-9]+]]:sgpr_32 = S_MOV_B32 12
+ ; GFX908-NEXT: [[S_MOV_B32_12:%[0-9]+]]:sgpr_32 = S_MOV_B32 13
+ ; GFX908-NEXT: [[S_MOV_B32_13:%[0-9]+]]:sgpr_32 = S_MOV_B32 14
+ ; GFX908-NEXT: [[S_MOV_B32_14:%[0-9]+]]:sgpr_32 = S_MOV_B32 15
+ ; GFX908-NEXT: [[S_MOV_B32_15:%[0-9]+]]:sgpr_32 = S_MOV_B32 16
+ ; GFX908-NEXT: [[S_MOV_B32_16:%[0-9]+]]:sgpr_32 = S_MOV_B32 17
+ ; GFX908-NEXT: [[S_MOV_B32_17:%[0-9]+]]:sgpr_32 = S_MOV_B32 18
+ ; GFX908-NEXT: [[S_MOV_B32_18:%[0-9]+]]:sgpr_32 = S_MOV_B32 19
+ ; GFX908-NEXT: [[S_MOV_B32_19:%[0-9]+]]:sgpr_32 = S_MOV_B32 20
+ ; GFX908-NEXT: [[S_MOV_B32_20:%[0-9]+]]:sgpr_32 = S_MOV_B32 21
+ ; GFX908-NEXT: [[S_MOV_B32_21:%[0-9]+]]:sgpr_32 = S_MOV_B32 22
+ ; GFX908-NEXT: [[S_MOV_B32_22:%[0-9]+]]:sgpr_32 = S_MOV_B32 23
+ ; GFX908-NEXT: [[S_MOV_B32_23:%[0-9]+]]:sgpr_32 = S_MOV_B32 24
+ ; GFX908-NEXT: [[S_MOV_B32_24:%[0-9]+]]:sgpr_32 = S_MOV_B32 25
+ ; GFX908-NEXT: [[S_MOV_B32_25:%[0-9]+]]:sgpr_32 = S_MOV_B32 26
+ ; GFX908-NEXT: [[S_MOV_B32_26:%[0-9]+]]:sgpr_32 = S_MOV_B32 27
+ ; GFX908-NEXT: [[S_MOV_B32_27:%[0-9]+]]:sgpr_32 = S_MOV_B32 28
+ ; GFX908-NEXT: [[S_MOV_B32_28:%[0-9]+]]:sgpr_32 = S_MOV_B32 29
+ ; GFX908-NEXT: [[S_MOV_B32_29:%[0-9]+]]:sgpr_32 = S_MOV_B32 30
+ ; GFX908-NEXT: [[S_MOV_B32_30:%[0-9]+]]:sgpr_32 = S_MOV_B32 31
+ ; GFX908-NEXT: [[S_MOV_B32_31:%[0-9]+]]:sgpr_32 = S_MOV_B32 32
+ ; GFX908-NEXT: [[S_MOV_B32_32:%[0-9]+]]:sgpr_32 = S_MOV_B32 33
+ ; GFX908-NEXT: [[S_MOV_B32_33:%[0-9]+]]:sgpr_32 = S_MOV_B32 34
+ ; GFX908-NEXT: [[S_MOV_B32_34:%[0-9]+]]:sgpr_32 = S_MOV_B32 35
+ ; GFX908-NEXT: [[S_MOV_B32_35:%[0-9]+]]:sgpr_32 = S_MOV_B32 36
+ ; GFX908-NEXT: [[S_MOV_B32_36:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
+ ; GFX908-NEXT: [[S_MOV_B32_37:%[0-9]+]]:sgpr_32 = S_MOV_B32 38
+ ; GFX908-NEXT: [[S_MOV_B32_38:%[0-9]+]]:sgpr_32 = S_MOV_B32 39
+ ; GFX908-NEXT: [[S_MOV_B32_39:%[0-9]+]]:sgpr_32 = S_MOV_B32 40
+ ; GFX908-NEXT: [[S_MOV_B32_40:%[0-9]+]]:sgpr_32 = S_MOV_B32 41
+ ; GFX908-NEXT: [[S_MOV_B32_41:%[0-9]+]]:sgpr_32 = S_MOV_B32 42
+ ; GFX908-NEXT: [[S_MOV_B32_42:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
+ ; GFX908-NEXT: [[S_MOV_B32_43:%[0-9]+]]:sgpr_32 = S_MOV_B32 44
+ ; GFX908-NEXT: [[S_MOV_B32_44:%[0-9]+]]:sgpr_32 = S_MOV_B32 45
+ ; GFX908-NEXT: [[S_MOV_B32_45:%[0-9]+]]:sgpr_32 = S_MOV_B32 46
+ ; GFX908-NEXT: [[S_MOV_B32_46:%[0-9]+]]:sgpr_32 = S_MOV_B32 47
+ ; GFX908-NEXT: [[S_MOV_B32_47:%[0-9]+]]:sgpr_32 = S_MOV_B32 48
+ ; GFX908-NEXT: [[S_MOV_B32_48:%[0-9]+]]:sgpr_32 = S_MOV_B32 49
+ ; GFX908-NEXT: [[S_MOV_B32_49:%[0-9]+]]:sgpr_32 = S_MOV_B32 50
+ ; GFX908-NEXT: [[S_MOV_B32_50:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
+ ; GFX908-NEXT: [[S_MOV_B32_51:%[0-9]+]]:sgpr_32 = S_MOV_B32 52
+ ; GFX908-NEXT: [[S_MOV_B32_52:%[0-9]+]]:sgpr_32 = S_MOV_B32 53
+ ; GFX908-NEXT: [[S_MOV_B32_53:%[0-9]+]]:sgpr_32 = S_MOV_B32 54
+ ; GFX908-NEXT: [[S_MOV_B32_54:%[0-9]+]]:sgpr_32 = S_MOV_B32 55
+ ; GFX908-NEXT: [[S_MOV_B32_55:%[0-9]+]]:sgpr_32 = S_MOV_B32 56
+ ; GFX908-NEXT: [[S_MOV_B32_56:%[0-9]+]]:sgpr_32 = S_MOV_B32 57
+ ; GFX908-NEXT: [[S_MOV_B32_57:%[0-9]+]]:sgpr_32 = S_MOV_B32 58
+ ; GFX908-NEXT: [[S_MOV_B32_58:%[0-9]+]]:sgpr_32 = S_MOV_B32 59
+ ; GFX908-NEXT: [[S_MOV_B32_59:%[0-9]+]]:sgpr_32 = S_MOV_B32 60
+ ; GFX908-NEXT: [[S_MOV_B32_60:%[0-9]+]]:sgpr_32 = S_MOV_B32 61
+ ; GFX908-NEXT: [[S_MOV_B32_61:%[0-9]+]]:sgpr_32 = S_MOV_B32 62
+ ; GFX908-NEXT: [[S_MOV_B32_62:%[0-9]+]]:sgpr_32 = S_MOV_B32 63
+ ; GFX908-NEXT: [[S_MOV_B32_63:%[0-9]+]]:sgpr_32 = S_MOV_B32 64
+ ; GFX908-NEXT: [[S_MOV_B32_64:%[0-9]+]]:sgpr_32 = S_MOV_B32 65
+ ; GFX908-NEXT: [[S_MOV_B32_65:%[0-9]+]]:sgpr_32 = S_MOV_B32 66
+ ; GFX908-NEXT: [[S_MOV_B32_66:%[0-9]+]]:sgpr_32 = S_MOV_B32 67
+ ; GFX908-NEXT: [[S_MOV_B32_67:%[0-9]+]]:sgpr_32 = S_MOV_B32 68
+ ; GFX908-NEXT: [[S_MOV_B32_68:%[0-9]+]]:sgpr_32 = S_MOV_B32 69
+ ; GFX908-NEXT: [[S_MOV_B32_69:%[0-9]+]]:sgpr_32 = S_MOV_B32 70
+ ; GFX908-NEXT: [[S_MOV_B32_70:%[0-9]+]]:sgpr_32 = S_MOV_B32 71
+ ; GFX908-NEXT: [[S_MOV_B32_71:%[0-9]+]]:sgpr_32 = S_MOV_B32 72
+ ; GFX908-NEXT: [[S_MOV_B32_72:%[0-9]+]]:sgpr_32 = S_MOV_B32 73
+ ; GFX908-NEXT: [[S_MOV_B32_73:%[0-9]+]]:sgpr_32 = S_MOV_B32 74
+ ; GFX908-NEXT: [[S_MOV_B32_74:%[0-9]+]]:sgpr_32 = S_MOV_B32 75
+ ; GFX908-NEXT: [[S_MOV_B32_75:%[0-9]+]]:sgpr_32 = S_MOV_B32 76
+ ; GFX908-NEXT: [[S_MOV_B32_76:%[0-9]+]]:sgpr_32 = S_MOV_B32 77
+ ; GFX908-NEXT: [[S_MOV_B32_77:%[0-9]+]]:sgpr_32 = S_MOV_B32 78
+ ; GFX908-NEXT: [[S_MOV_B32_78:%[0-9]+]]:sgpr_32 = S_MOV_B32 79
+ ; GFX908-NEXT: {{ $}}
+ ; GFX908-NEXT: bb.1:
+ ; GFX908-NEXT: [[S_MOV_B32_79:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_79]], implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_4]], implicit [[S_MOV_B32_5]], implicit [[S_MOV_B32_6]], implicit [[S_MOV_B32_7]], implicit [[S_MOV_B32_8]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_9]], implicit [[S_MOV_B32_10]], implicit [[S_MOV_B32_11]], implicit [[S_MOV_B32_12]], implicit [[S_MOV_B32_13]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_14]], implicit [[S_MOV_B32_15]], implicit [[S_MOV_B32_16]], implicit [[S_MOV_B32_17]], implicit [[S_MOV_B32_18]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_19]], implicit [[S_MOV_B32_20]], implicit [[S_MOV_B32_21]], implicit [[S_MOV_B32_22]], implicit [[S_MOV_B32_23]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_24]], implicit [[S_MOV_B32_25]], implicit [[S_MOV_B32_26]], implicit [[S_MOV_B32_27]], implicit [[S_MOV_B32_28]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_29]], implicit [[S_MOV_B32_30]], implicit [[S_MOV_B32_31]], implicit [[S_MOV_B32_32]], implicit [[S_MOV_B32_33]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_34]], implicit [[S_MOV_B32_35]], implicit [[S_MOV_B32_36]], implicit [[S_MOV_B32_37]], implicit [[S_MOV_B32_38]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_39]], implicit [[S_MOV_B32_40]], implicit [[S_MOV_B32_41]], implicit [[S_MOV_B32_42]], implicit [[S_MOV_B32_43]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_44]], implicit [[S_MOV_B32_45]], implicit [[S_MOV_B32_46]], implicit [[S_MOV_B32_47]], implicit [[S_MOV_B32_48]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_49]], implicit [[S_MOV_B32_50]], implicit [[S_MOV_B32_51]], implicit [[S_MOV_B32_52]], implicit [[S_MOV_B32_53]]
+ ; GFX908-NEXT: S_NOP 0, implicit [[S_MOV_B32_54]], implicit [[S_MOV_B32_55]], implicit [[S_MOV_B32_56]], implicit [[S_MOV_B32_57]], implicit [[S_MOV_B32_58]]
+ ; GFX908-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140036
More information about the llvm-commits
mailing list