[llvm] [AMDGPU] Implement Waitcnt Expansion for Profiling (PR #169345)
Pankaj Dwivedi via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 05:42:18 PST 2025
================
@@ -1523,38 +1556,104 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
/// required counters in \p Wait
bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
MachineBasicBlock &Block, MachineBasicBlock::instr_iterator It,
- AMDGPU::Waitcnt Wait) {
+ AMDGPU::Waitcnt Wait, WaitcntBrackets *ScoreBrackets) {
assert(ST);
assert(isNormalMode(MaxCounter));
bool Modified = false;
const DebugLoc &DL = Block.findDebugLoc(It);
+ // Helper to emit expanded waitcnt sequence for profiling.
+ // Emits waitcnts from (Outstanding-1) down to Target, or just Target if
+ // nothing to expand. The EmitWaitcnt callback emits a single waitcnt.
+ auto emitExpandedWaitcnt = [&](unsigned Outstanding, unsigned Target,
+ auto EmitWaitcnt) {
+ if (Outstanding > Target) {
+ for (unsigned i = Outstanding - 1; i >= Target && i != ~0u; --i) {
+ EmitWaitcnt(i);
+ Modified = true;
+ }
+ } else {
+ EmitWaitcnt(Target);
+ Modified = true;
+ }
+ };
+
// Waits for VMcnt, LKGMcnt and/or EXPcnt are encoded together into a
// single instruction while VScnt has its own instruction.
if (Wait.hasWaitExceptStoreCnt()) {
- unsigned Enc = AMDGPU::encodeWaitcnt(IV, Wait);
- [[maybe_unused]] auto SWaitInst =
- BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Enc);
- Modified = true;
+ // If profiling expansion is enabled and we have score brackets,
+ // emit an expanded sequence
+ if (ExpandWaitcntProfiling && ScoreBrackets) {
+ if (Wait.LoadCnt != ~0u) {
----------------
PankajDwivedi-25 wrote:
addressed.
https://github.com/llvm/llvm-project/pull/169345
More information about the llvm-commits
mailing list