[clang] [llvm] Revert "[AMDGPU][SIInsertWaitcnt] Implement Waitcnt Expansion for Profiling" (PR #175904)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 14 00:47:28 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Pankaj Dwivedi (PankajDwivedi-25)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->169345 to address the reviews
---
Patch is 68.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/175904.diff
7 Files Affected:
- (modified) clang/include/clang/Basic/CodeGenOptions.def (-4)
- (modified) clang/include/clang/Options/Options.td (-7)
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (-2)
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+92-203)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (-19)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (-20)
- (removed) llvm/test/CodeGen/AMDGPU/expand-waitcnt-profiling.ll (-944)
``````````diff
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index baf8b093c10e6..6cdbffc456193 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -466,10 +466,6 @@ CODEGENOPT(AAPCSBitfieldWidth, 1, 1, Benign)
/// propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU Only)
CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1, Benign)
-/// Enable expanded waitcnt for profiling (AMDGPU Only)
-/// Expands s_waitcnt instructions to help PC-sampling profilers identify stalls.
-CODEGENOPT(AMDGPUExpandWaitcntProfiling, 1, 0, Benign)
-
// Whether to emit Swift Async function extended frame information: auto,
// never, always.
ENUM_CODEGENOPT(SwiftAsyncFramePointer, SwiftAsyncFramePointerKind, 2,
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 2f57a5b13b917..5ad0ff2a773c8 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -5585,13 +5585,6 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee",
"This option changes the ABI. (AMDGPU only)">,
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
-defm amdgpu_expand_waitcnt_profiling : BoolMOption<"amdgpu-expand-waitcnt-profiling",
- CodeGenOpts<"AMDGPUExpandWaitcntProfiling">, DefaultFalse,
- PosFlag<SetTrue, [], [ClangOption, CC1Option], "Expand s_waitcnt instructions to help "
- "PC-sampling profilers identify memory stalls. Instead of a single waitcnt(target), "
- "emits waitcnt(N-1), waitcnt(N-2), ..., waitcnt(target). (AMDGPU only)">,
- NegFlag<SetFalse, [], [ClangOption]>>;
-
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4bc9557b26b52..0ab6c753b8bad 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -443,8 +443,6 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
setFunctionDeclAttributes(FD, F, M);
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
- if (getABIInfo().getCodeGenOpts().AMDGPUExpandWaitcntProfiling)
- F->addFnAttr("amdgpu-expand-waitcnt-profiling");
}
unsigned AMDGPUTargetCodeGenInfo::getDeviceKernelCallingConv() const {
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b3e834b66ad45..bf842e0ecb4af 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -105,35 +105,6 @@ auto inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS) {
return enum_seq(LOAD_CNT, MaxCounter);
}
-// Get the maximum wait count value for a given counter type.
-static unsigned getWaitCountMax(const AMDGPU::HardwareLimits &Limits,
- InstCounterType T) {
- switch (T) {
- case LOAD_CNT:
- return Limits.LoadcntMax;
- case DS_CNT:
- return Limits.DscntMax;
- case EXP_CNT:
- return Limits.ExpcntMax;
- case STORE_CNT:
- return Limits.StorecntMax;
- case SAMPLE_CNT:
- return Limits.SamplecntMax;
- case BVH_CNT:
- return Limits.BvhcntMax;
- case KM_CNT:
- return Limits.KmcntMax;
- case X_CNT:
- return Limits.XcntMax;
- case VA_VDST:
- return Limits.VaVdstMax;
- case VM_VSRC:
- return Limits.VmVsrcMax;
- default:
- return 0;
- }
-}
-
/// Integer IDs used to track vector memory locations we may have to wait on.
/// Encoded as u16 chunks:
///
@@ -169,6 +140,19 @@ static constexpr VMEMID toVMEMID(MCRegUnit RU) {
return static_cast<unsigned>(RU);
}
+struct HardwareLimits {
+ unsigned LoadcntMax; // Corresponds to VMcnt prior to gfx12.
+ unsigned ExpcntMax;
+ unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
+ unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
+ unsigned SamplecntMax; // gfx12+ only.
+ unsigned BvhcntMax; // gfx12+ only.
+ unsigned KmcntMax; // gfx12+ only.
+ unsigned XcntMax; // gfx1250.
+ unsigned VaVdstMax; // gfx12+ expert mode only.
+ unsigned VmVsrcMax; // gfx12+ expert mode only.
+};
+
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
DECL(VMEM_ACCESS) /* vmem read & write (pre-gfx10), vmem read (gfx10+) */ \
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
@@ -330,27 +314,19 @@ class WaitcntGenerator {
AMDGPU::IsaVersion IV;
InstCounterType MaxCounter;
bool OptNone;
- bool ExpandWaitcntProfiling = false;
- const AMDGPU::HardwareLimits *Limits = nullptr;
public:
WaitcntGenerator() = default;
- WaitcntGenerator(const MachineFunction &MF, InstCounterType MaxCounter,
- const AMDGPU::HardwareLimits *Limits)
+ WaitcntGenerator(const MachineFunction &MF, InstCounterType MaxCounter)
: ST(&MF.getSubtarget<GCNSubtarget>()), TII(ST->getInstrInfo()),
IV(AMDGPU::getIsaVersion(ST->getCPU())), MaxCounter(MaxCounter),
OptNone(MF.getFunction().hasOptNone() ||
- MF.getTarget().getOptLevel() == CodeGenOptLevel::None),
- ExpandWaitcntProfiling(
- MF.getFunction().hasFnAttribute("amdgpu-expand-waitcnt-profiling")),
- Limits(Limits) {}
+ MF.getTarget().getOptLevel() == CodeGenOptLevel::None) {}
// Return true if the current function should be compiled with no
// optimization.
bool isOptNone() const { return OptNone; }
- const AMDGPU::HardwareLimits &getLimits() const { return *Limits; }
-
// Edits an existing sequence of wait count instructions according
// to an incoming Waitcnt value, which is itself updated to reflect
// any new wait count instructions which may need to be generated by
@@ -372,11 +348,9 @@ class WaitcntGenerator {
// Generates new wait count instructions according to the value of
// Wait, returning true if any new instructions were created.
- // If ScoreBrackets is provided, it can be used for profiling expansion.
virtual bool createNewWaitcnt(MachineBasicBlock &Block,
MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait,
- WaitcntBrackets *ScoreBrackets = nullptr) = 0;
+ AMDGPU::Waitcnt Wait) = 0;
// Returns an array of bit masks which can be used to map values in
// WaitEventType to corresponding counter values in InstCounterType.
@@ -401,10 +375,7 @@ class WaitcntGenerator {
class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
public:
- WaitcntGeneratorPreGFX12() = default;
- WaitcntGeneratorPreGFX12(const MachineFunction &MF,
- const AMDGPU::HardwareLimits *Limits)
- : WaitcntGenerator(MF, NUM_NORMAL_INST_CNTS, Limits) {}
+ using WaitcntGenerator::WaitcntGenerator;
bool
applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
@@ -413,8 +384,7 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
bool createNewWaitcnt(MachineBasicBlock &Block,
MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait,
- WaitcntBrackets *ScoreBrackets = nullptr) override;
+ AMDGPU::Waitcnt Wait) override;
const unsigned *getWaitEventMask() const override {
assert(ST);
@@ -446,10 +416,8 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
public:
WaitcntGeneratorGFX12Plus() = default;
WaitcntGeneratorGFX12Plus(const MachineFunction &MF,
- InstCounterType MaxCounter,
- const AMDGPU::HardwareLimits *Limits,
- bool IsExpertMode)
- : WaitcntGenerator(MF, MaxCounter, Limits), IsExpertMode(IsExpertMode) {}
+ InstCounterType MaxCounter, bool IsExpertMode)
+ : WaitcntGenerator(MF, MaxCounter), IsExpertMode(IsExpertMode) {}
bool
applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
@@ -458,8 +426,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
bool createNewWaitcnt(MachineBasicBlock &Block,
MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait,
- WaitcntBrackets *ScoreBrackets = nullptr) override;
+ AMDGPU::Waitcnt Wait) override;
const unsigned *getWaitEventMask() const override {
assert(ST);
@@ -533,7 +500,7 @@ class SIInsertWaitcnts {
// message.
DenseSet<MachineInstr *> ReleaseVGPRInsts;
- AMDGPU::HardwareLimits Limits;
+ HardwareLimits Limits;
public:
SIInsertWaitcnts(MachineLoopInfo *MLI, MachinePostDominatorTree *PDT,
@@ -544,7 +511,33 @@ class SIInsertWaitcnts {
(void)ForceVMCounter;
}
- const AMDGPU::HardwareLimits &getLimits() const { return Limits; }
+ unsigned getWaitCountMax(InstCounterType T) const {
+ switch (T) {
+ case LOAD_CNT:
+ return Limits.LoadcntMax;
+ case DS_CNT:
+ return Limits.DscntMax;
+ case EXP_CNT:
+ return Limits.ExpcntMax;
+ case STORE_CNT:
+ return Limits.StorecntMax;
+ case SAMPLE_CNT:
+ return Limits.SamplecntMax;
+ case BVH_CNT:
+ return Limits.BvhcntMax;
+ case KM_CNT:
+ return Limits.KmcntMax;
+ case X_CNT:
+ return Limits.XcntMax;
+ case VA_VDST:
+ return Limits.VaVdstMax;
+ case VM_VSRC:
+ return Limits.VmVsrcMax;
+ default:
+ break;
+ }
+ return 0;
+ }
PreheaderFlushFlags getPreheaderFlushFlags(MachineLoop *ML,
const WaitcntBrackets &Brackets);
@@ -776,7 +769,7 @@ class WaitcntBrackets {
unsigned getPendingGDSWait() const {
return std::min(getScoreUB(DS_CNT) - LastGDS,
- getWaitCountMax(Context->getLimits(), DS_CNT) - 1);
+ Context->getWaitCountMax(DS_CNT) - 1);
}
void setPendingGDS() { LastGDS = ScoreUBs[DS_CNT]; }
@@ -803,8 +796,8 @@ class WaitcntBrackets {
}
void setStateOnFunctionEntryOrReturn() {
- setScoreUB(STORE_CNT, getScoreUB(STORE_CNT) +
- getWaitCountMax(Context->getLimits(), STORE_CNT));
+ setScoreUB(STORE_CNT,
+ getScoreUB(STORE_CNT) + Context->getWaitCountMax(STORE_CNT));
PendingEvents |= Context->WaitEventMaskForInst[STORE_CNT];
}
@@ -860,9 +853,8 @@ class WaitcntBrackets {
if (T != EXP_CNT)
return;
- if (getScoreRange(EXP_CNT) > getWaitCountMax(Context->getLimits(), EXP_CNT))
- ScoreLBs[EXP_CNT] =
- ScoreUBs[EXP_CNT] - getWaitCountMax(Context->getLimits(), EXP_CNT);
+ if (getScoreRange(EXP_CNT) > Context->getWaitCountMax(EXP_CNT))
+ ScoreLBs[EXP_CNT] = ScoreUBs[EXP_CNT] - Context->getWaitCountMax(EXP_CNT);
}
void setRegScore(MCPhysReg Reg, InstCounterType T, unsigned Val) {
@@ -1365,8 +1357,8 @@ void WaitcntBrackets::determineWaitForScore(InstCounterType T,
} else {
// If a counter has been maxed out avoid overflow by waiting for
// MAX(CounterType) - 1 instead.
- unsigned NeededWait = std::min(
- UB - ScoreToWait, getWaitCountMax(Context->getLimits(), T) - 1);
+ unsigned NeededWait =
+ std::min(UB - ScoreToWait, Context->getWaitCountMax(T) - 1);
addWait(Wait, T, NeededWait);
}
}
@@ -1683,109 +1675,38 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
/// required counters in \p Wait
bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
MachineBasicBlock &Block, MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait, WaitcntBrackets *ScoreBrackets) {
+ AMDGPU::Waitcnt Wait) {
assert(ST);
assert(isNormalMode(MaxCounter));
bool Modified = false;
const DebugLoc &DL = Block.findDebugLoc(It);
- // Helper to emit expanded waitcnt sequence for profiling.
- // Emits waitcnts from (Outstanding-1) down to Target, or just Target if
- // nothing to expand. The EmitWaitcnt callback emits a single waitcnt.
- auto EmitExpandedWaitcnt = [&](unsigned Outstanding, unsigned Target,
- auto EmitWaitcnt) {
- if (Outstanding > Target) {
- for (unsigned i = Outstanding - 1; i >= Target && i != ~0u; --i) {
- EmitWaitcnt(i);
- Modified = true;
- }
- } else {
- EmitWaitcnt(Target);
- Modified = true;
- }
- };
-
// Waits for VMcnt, LKGMcnt and/or EXPcnt are encoded together into a
// single instruction while VScnt has its own instruction.
if (Wait.hasWaitExceptStoreCnt()) {
- // If profiling expansion is enabled and we have score brackets,
- // emit an expanded sequence
- if (ExpandWaitcntProfiling && ScoreBrackets) {
- // Check if any of the counters to be waited on are out-of-order.
- // If so, fall back to normal (non-expanded) behavior since expansion
- // would provide misleading profiling information.
- bool AnyOutOfOrder = false;
- for (auto CT : {LOAD_CNT, DS_CNT, EXP_CNT}) {
- unsigned &WaitCnt = getCounterRef(Wait, CT);
- if (WaitCnt != ~0u && ScoreBrackets->counterOutOfOrder(CT)) {
- AnyOutOfOrder = true;
- break;
- }
- }
-
- if (AnyOutOfOrder) {
- // Fall back to non-expanded wait
- unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
+ unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
+ [[maybe_unused]] auto SWaitInst =
BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc);
- Modified = true;
- } else {
- // All counters are in-order, safe to expand
- for (auto CT : {LOAD_CNT, DS_CNT, EXP_CNT}) {
- unsigned &WaitCnt = getCounterRef(Wait, CT);
- if (WaitCnt == ~0u)
- continue;
-
- unsigned Outstanding = std::min(ScoreBrackets->getScoreUB(CT) -
- ScoreBrackets->getScoreLB(CT),
- getWaitCountMax(getLimits(), CT) - 1);
- EmitExpandedWaitcnt(Outstanding, WaitCnt, [&](unsigned Count) {
- AMDGPU::Waitcnt W;
- getCounterRef(W, CT) = Count;
- BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT))
- .addImm(AMDGPU::encodeWaitcnt(IV, W));
- });
- }
- }
- } else {
- // Normal behavior: emit single combined waitcnt
- unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
- [[maybe_unused]] auto SWaitInst =
- BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc);
- Modified = true;
+ Modified = true;
- LLVM_DEBUG(dbgs() << "PreGFX12::createNewWaitcnt\n";
- if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It;
- dbgs() << "New Instr: " << *SWaitInst << '\n');
- }
+ LLVM_DEBUG(dbgs() << "PreGFX12::createNewWaitcnt\n";
+ if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It;
+ dbgs() << "New Instr: " << *SWaitInst << '\n');
}
if (Wait.hasWaitStoreCnt()) {
assert(ST->hasVscnt());
- if (ExpandWaitcntProfiling && ScoreBrackets && Wait.StoreCnt != ~0u &&
- !ScoreBrackets->counterOutOfOrder(STORE_CNT)) {
- // Only expand if counter is not out-of-order
- unsigned Outstanding =
- std::min(ScoreBrackets->getScoreUB(STORE_CNT) -
- ScoreBrackets->getScoreLB(STORE_CNT),
- getWaitCountMax(getLimits(), STORE_CNT) - 1);
- EmitExpandedWaitcnt(Outstanding, Wait.StoreCnt, [&](unsigned Count) {
+ [[maybe_unused]] auto SWaitInst =
BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
.addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(Count);
- });
- } else {
- [[maybe_unused]] auto SWaitInst =
- BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(Wait.StoreCnt);
- Modified = true;
+ .addImm(Wait.StoreCnt);
+ Modified = true;
- LLVM_DEBUG(dbgs() << "PreGFX12::createNewWaitcnt\n";
- if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It;
- dbgs() << "New Instr: " << *SWaitInst << '\n');
- }
+ LLVM_DEBUG(dbgs() << "PreGFX12::createNewWaitcnt\n";
+ if (It != Block.instr_end()) dbgs() << "Old Instr: " << *It;
+ dbgs() << "New Instr: " << *SWaitInst << '\n');
}
return Modified;
@@ -2082,55 +2003,13 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
/// Generate S_WAIT_*CNT instructions for any required counters in \p Wait
bool WaitcntGeneratorGFX12Plus::createNewWaitcnt(
MachineBasicBlock &Block, MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait, WaitcntBrackets *ScoreBrackets) {
+ AMDGPU::Waitcnt Wait) {
assert(ST);
assert(!isNormalMode(MaxCounter));
bool Modified = false;
const DebugLoc &DL = Block.findDebugLoc(It);
- // Helper to emit expanded waitcnt sequence for profiling.
- auto EmitExpandedWaitcnt = [&](unsigned Outstanding, unsigned Target,
- auto EmitWaitcnt) {
- if (Outstanding > Target) {
- for (unsigned i = Outstanding - 1; i >= Target && i != ~0u; --i) {
- EmitWaitcnt(i);
- Modified = true;
- }
- } else {
- EmitWaitcnt(Target);
- Modified = true;
- }
- };
-
- // For GFX12+, we use separate wait instructions, which makes expansion
- // simpler
- if (ExpandWaitcntProfiling && ScoreBrackets) {
- for (auto CT : inst_counter_types(NUM_EXTENDED_INST_CNTS)) {
- unsigned Count = getWait(Wait, CT);
- if (Count == ~0u)
- continue;
-
- // Skip expansion for out-of-order counters - emit normal wait instead
- if (ScoreBrackets->counterOutOfOrder(CT)) {
- BuildMI(Block, It, DL, TII->get(instrsForExtendedCounterTypes[CT]))
- .addImm(Count);
- Modified = true;
- continue;
- }
-
- unsigned Outstanding = std::min(ScoreBrackets->getScoreUB(CT) -
- ScoreBrackets->getScoreLB(CT),
- getWaitCountMax(getLimits(), CT) - 1);
- EmitExpandedWaitcnt(Outstanding, Count, [&](unsigned Val) {
- BuildMI(Block, It, DL, TII->get(instrsForExtendedCounterTypes[CT]))
- .addImm(Val);
- });
- }
- return Modified;
- }
-
- // Normal behavior (no expansion)
// Check for opportunities to use combined wait instructions.
if (Wait.DsCnt != ~0u) {
MachineInstr *SWaitInst = nullptr;
@@ -2529,7 +2408,9 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
Modified =
WCG->applyPreexistingWaitcnt(ScoreBrackets, *OldWaitcntInstr, Wait, It);
- AMDGPU::Waitcnt WaitForScore = Wait;
+ // Any counts that could have been applied to any existing waitcnt
+ // instructions will have been done so, now deal with any remaining.
+ ScoreBrackets.applyWaitcnt(Wait);
// ExpCnt can be merged into VINTERP.
if (Wait.ExpCnt != ~0u && It != Block.instr_end() &&
@@ -2546,13 +2427,9 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
<< "Update Instr: " << *It);
}
- if (WCG->createNewWaitcnt(Block, It, Wait, &ScoreBrackets))
+ if (WCG->createNewWaitcnt(Block, It, Wait))
Modified = true;
- // Any counts that could have been applied to any existing waitcnt
- // instructions will have been done so, now deal with any remaining.
- ScoreBrackets.applyWaitcnt(WaitForScore);
-
return Modified;
}
@@ -3259,9 +3136,6 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(ST->getCPU());
- // Initialize hardware limits first, as they're needed by the generators.
- Limits = AMDGPU::HardwareLimits(IV, ST->hasExtendedWaitCounts());
-
if (ST->hasExtendedWaitCounts()) {
IsExpertMode = ST->hasExpertSchedulingMode() &&
(ExpertSchedulingModeFlag.getNumOccurrences()
@@ -3270,12 +3144,11 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
.getFnAttrib...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/175904
More information about the cfe-commits
mailing list