[llvm] c8a9031 - [AMDGPU] Small cleanups in wait counter code
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 28 03:05:09 PDT 2022
Author: Stephen Thomas
Date: 2022-10-28T11:05:02+01:00
New Revision: c8a90316fa75f57b3789c9346d482b379f628c09
URL: https://github.com/llvm/llvm-project/commit/c8a90316fa75f57b3789c9346d482b379f628c09
DIFF: https://github.com/llvm/llvm-project/commit/c8a90316fa75f57b3789c9346d482b379f628c09.diff
LOG: [AMDGPU] Small cleanups in wait counter code
A small number of cleanups and refactors intended to enhance readability in
two passes that deal with s_waitcnt instructions.
Differential Revision: https://reviews.llvm.org/D136677
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index e760575cb586..81013db1f003 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1332,6 +1332,12 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
return false;
}
+static bool isStoreCountWaitZero(const MachineInstr &I) {
+ return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+ I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+ !I.getOperand(1).getImm();
+}
+
bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
if (!RunLdsBranchVmemWARHazardFixup)
return false;
@@ -1351,9 +1357,7 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
return false;
auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) {
- return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
- I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
- !I.getOperand(1).getImm());
+ return IsHazardInst(I) || isStoreCountWaitZero(I);
};
auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) {
@@ -1370,9 +1374,7 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
if (InstType == InstType2)
return true;
- return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
- I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
- !I.getOperand(1).getImm();
+ return isStoreCountWaitZero(I);
};
return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) !=
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5b352ac7b870..5e0ae4c2581f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -211,19 +211,20 @@ class WaitcntBrackets {
return ScoreUBs[T];
}
+ unsigned getScoreRange(InstCounterType T) const {
+ return getScoreUB(T) - getScoreLB(T);
+ }
+
// Mapping from event to counter.
InstCounterType eventCounter(WaitEventType E) {
- if (WaitEventMaskForInst[VM_CNT] & (1 << E))
- return VM_CNT;
- if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
- return LGKM_CNT;
- if (WaitEventMaskForInst[VS_CNT] & (1 << E))
- return VS_CNT;
- assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
- return EXP_CNT;
+ for (auto T : inst_counter_types()) {
+ if (WaitEventMaskForInst[T] & (1 << E))
+ return T;
+ }
+ llvm_unreachable("event type has no associated counter");
}
- unsigned getRegScore(int GprNo, InstCounterType T) {
+ unsigned getRegScore(int GprNo, InstCounterType T) const {
if (GprNo < NUM_ALL_VGPRS) {
return VgprScores[T][GprNo];
}
@@ -240,21 +241,25 @@ class WaitcntBrackets {
bool counterOutOfOrder(InstCounterType T) const;
void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
void simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
- void determineWait(InstCounterType T, unsigned ScoreToWait,
- AMDGPU::Waitcnt &Wait) const;
+ void determineWait(InstCounterType T, int RegNo, AMDGPU::Waitcnt &Wait) const;
void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
void applyWaitcnt(InstCounterType T, unsigned Count);
void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI, WaitEventType E,
MachineInstr &MI);
- bool hasPending() const { return PendingEvents != 0; }
- bool hasPendingEvent(WaitEventType E) const {
+ unsigned hasPendingEvent() const { return PendingEvents; }
+ unsigned hasPendingEvent(WaitEventType E) const {
return PendingEvents & (1 << E);
}
+ unsigned hasPendingEvent(InstCounterType T) const {
+ unsigned HasPending = PendingEvents & WaitEventMaskForInst[T];
+ assert((HasPending != 0) == (getScoreRange(T) != 0));
+ return HasPending;
+ }
bool hasMixedPendingEvents(InstCounterType T) const {
- unsigned Events = PendingEvents & WaitEventMaskForInst[T];
+ unsigned Events = hasPendingEvent(T);
// Return true if more than one bit is set in Events.
return Events & (Events - 1);
}
@@ -304,11 +309,12 @@ class WaitcntBrackets {
void setScoreUB(InstCounterType T, unsigned Val) {
assert(T < NUM_INST_CNTS);
ScoreUBs[T] = Val;
- if (T == EXP_CNT) {
- unsigned UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
- if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
- ScoreLBs[T] = UB;
- }
+
+ if (T != EXP_CNT)
+ return;
+
+ if (getScoreRange(EXP_CNT) > getWaitCountMax(EXP_CNT))
+ ScoreLBs[EXP_CNT] = ScoreUBs[EXP_CNT] - getWaitCountMax(EXP_CNT);
}
void setRegScore(int GprNo, InstCounterType T, unsigned Val) {
@@ -694,29 +700,30 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
void WaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
for (auto T : inst_counter_types()) {
- unsigned LB = getScoreLB(T);
- unsigned UB = getScoreUB(T);
+ unsigned SR = getScoreRange(T);
switch (T) {
case VM_CNT:
- OS << " VM_CNT(" << UB - LB << "): ";
+ OS << " VM_CNT(" << SR << "): ";
break;
case LGKM_CNT:
- OS << " LGKM_CNT(" << UB - LB << "): ";
+ OS << " LGKM_CNT(" << SR << "): ";
break;
case EXP_CNT:
- OS << " EXP_CNT(" << UB - LB << "): ";
+ OS << " EXP_CNT(" << SR << "): ";
break;
case VS_CNT:
- OS << " VS_CNT(" << UB - LB << "): ";
+ OS << " VS_CNT(" << SR << "): ";
break;
default:
- OS << " UNKNOWN(" << UB - LB << "): ";
+ OS << " UNKNOWN(" << SR << "): ";
break;
}
- if (LB < UB) {
+ if (SR != 0) {
// Print vgpr scores.
+ unsigned LB = getScoreLB(T);
+
for (int J = 0; J <= VgprUB; J++) {
unsigned RegScore = getRegScore(J, T);
if (RegScore <= LB)
@@ -755,18 +762,17 @@ void WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
void WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
unsigned &Count) const {
- const unsigned LB = getScoreLB(T);
- const unsigned UB = getScoreUB(T);
-
// The number of outstanding events for this type, T, can be calculated
// as (UB - LB). If the current Count is greater than or equal to the number
// of outstanding events, then the wait for this counter is redundant.
- if (Count >= UB - LB)
+ if (Count >= getScoreRange(T))
Count = ~0u;
}
-void WaitcntBrackets::determineWait(InstCounterType T, unsigned ScoreToWait,
+void WaitcntBrackets::determineWait(InstCounterType T, int RegNo,
AMDGPU::Waitcnt &Wait) const {
+ unsigned ScoreToWait = getRegScore(RegNo, T);
+
// If the score of src_operand falls within the bracket, we need an
// s_waitcnt instruction.
const unsigned LB = getScoreLB(T);
@@ -1106,8 +1112,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
for (int RegNo = CallAddrOpInterval.first;
RegNo < CallAddrOpInterval.second; ++RegNo)
- ScoreBrackets.determineWait(
- LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+ ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait);
int RtnAddrOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
@@ -1117,8 +1122,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
for (int RegNo = RtnAddrOpInterval.first;
RegNo < RtnAddrOpInterval.second; ++RegNo)
- ScoreBrackets.determineWait(
- LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+ ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait);
}
}
} else {
@@ -1150,11 +1154,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
continue;
unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
// VM_CNT is only relevant to vgpr or LDS.
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(VM_CNT, RegNo, Wait);
if (Memop->isStore()) {
- ScoreBrackets.determineWait(
- EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+ ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait);
}
}
@@ -1176,17 +1178,14 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
if (Op.isUse() || !SIInstrInfo::isVMEM(MI) ||
ScoreBrackets.hasOtherPendingVmemTypes(RegNo,
getVmemType(MI))) {
- ScoreBrackets.determineWait(
- VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+ ScoreBrackets.determineWait(VM_CNT, RegNo, Wait);
ScoreBrackets.clearVgprVmemTypes(RegNo);
}
if (Op.isDef() || ScoreBrackets.hasPendingEvent(EXP_LDS_ACCESS)) {
- ScoreBrackets.determineWait(
- EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+ ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait);
}
}
- ScoreBrackets.determineWait(
- LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+ ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait);
}
}
}
@@ -1205,9 +1204,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// after fixing the scheduler. Also, the Shader Compiler code is
// independent of target.
if (readsVCCZ(MI) && ST->hasReadVCCZBug()) {
- if (ScoreBrackets.getScoreLB(LGKM_CNT) <
- ScoreBrackets.getScoreUB(LGKM_CNT) &&
- ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+ if (ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
Wait.LgkmCnt = 0;
}
}
@@ -1228,9 +1225,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
Wait.VsCnt = 0;
if (FlushVmCnt) {
- unsigned UB = ScoreBrackets.getScoreUB(VM_CNT);
- unsigned LB = ScoreBrackets.getScoreLB(VM_CNT);
- if (UB - LB != 0)
+ if (ScoreBrackets.hasPendingEvent(VM_CNT))
Wait.VmCnt = 0;
}
@@ -1245,9 +1240,7 @@ bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block,
MachineInstr *OldWaitcntInstr) {
AMDGPU::Waitcnt Wait;
- unsigned UB = ScoreBrackets.getScoreUB(VM_CNT);
- unsigned LB = ScoreBrackets.getScoreLB(VM_CNT);
- if (UB - LB == 0)
+ if (!ScoreBrackets.hasPendingEvent(VM_CNT))
return false;
Wait.VmCnt = 0;
@@ -1603,8 +1596,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
// 2. Restore the correct value of vccz by writing the current value
// of vcc back to vcc.
if (ST->hasReadVCCZBug() &&
- ScoreBrackets.getScoreLB(LGKM_CNT) <
- ScoreBrackets.getScoreUB(LGKM_CNT) &&
ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
// Writes to vcc while there's an outstanding smem read may get
// clobbered as soon as any read completes.
@@ -1848,7 +1839,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
BI.Dirty = false;
- if (Brackets->hasPending()) {
+ if (Brackets->hasPendingEvent()) {
BlockInfo *MoveBracketsToSucc = nullptr;
for (MachineBasicBlock *Succ : BI.MBB->successors()) {
auto SuccBII = BlockInfos.find(Succ);
More information about the llvm-commits
mailing list