[llvm] r347852 - AMDGPU/InsertWaitcnt: Consistently use uint32_t for scores / time points
Nicolai Haehnle via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 29 03:06:22 PST 2018
Author: nha
Date: Thu Nov 29 03:06:21 2018
New Revision: 347852
URL: http://llvm.org/viewvc/llvm-project?rev=347852&view=rev
Log:
AMDGPU/InsertWaitcnt: Consistently use uint32_t for scores / time points
Summary:
There is one obsolete reference to using -1 as an indication of "unknown",
but this isn't actually used anywhere.
Using unsigned makes robust wrapping checks easier.
Reviewers: msearles, rampitec, scott.linder, kanarayan
Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, llvm-commits, tpr, t-tye, hakzsam
Differential Revision: https://reviews.llvm.org/D54230
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=347852&r1=347851&r2=347852&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Thu Nov 29 03:06:21 2018
@@ -112,9 +112,9 @@ iterator_range<enum_iterator<InstCounter
using RegInterval = std::pair<signed, signed>;
struct {
- int32_t VmcntMax;
- int32_t ExpcntMax;
- int32_t LgkmcntMax;
+ uint32_t VmcntMax;
+ uint32_t ExpcntMax;
+ uint32_t LgkmcntMax;
int32_t NumVGPRsMax;
int32_t NumSGPRsMax;
} HardwareLimits;
@@ -194,7 +194,7 @@ public:
~BlockWaitcntBrackets() = default;
- static int32_t getWaitCountMax(InstCounterType T) {
+ static uint32_t getWaitCountMax(InstCounterType T) {
switch (T) {
case VM_CNT:
return HardwareLimits.VmcntMax;
@@ -208,33 +208,33 @@ public:
return 0;
}
- void setScoreLB(InstCounterType T, int32_t Val) {
+ void setScoreLB(InstCounterType T, uint32_t Val) {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return;
ScoreLBs[T] = Val;
}
- void setScoreUB(InstCounterType T, int32_t Val) {
+ void setScoreUB(InstCounterType T, uint32_t Val) {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return;
ScoreUBs[T] = Val;
if (T == EXP_CNT) {
- int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
- if (ScoreLBs[T] < UB)
+ uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
+ if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
ScoreLBs[T] = UB;
}
}
- int32_t getScoreLB(InstCounterType T) const {
+ uint32_t getScoreLB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return 0;
return ScoreLBs[T];
}
- int32_t getScoreUB(InstCounterType T) const {
+ uint32_t getScoreUB(InstCounterType T) const {
assert(T < NUM_INST_CNTS);
if (T >= NUM_INST_CNTS)
return 0;
@@ -251,7 +251,7 @@ public:
return EXP_CNT;
}
- void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
+ void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
if (GprNo < NUM_ALL_VGPRS) {
if (GprNo > VgprUB) {
VgprUB = GprNo;
@@ -266,7 +266,7 @@ public:
}
}
- int32_t getRegScore(int GprNo, InstCounterType T) {
+ uint32_t getRegScore(int GprNo, InstCounterType T) {
if (GprNo < NUM_ALL_VGPRS) {
return VgprScores[T][GprNo];
}
@@ -291,7 +291,7 @@ public:
void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
- unsigned OpNo, int32_t Val);
+ unsigned OpNo, uint32_t Val);
int32_t getMaxVGPR() const { return VgprUB; }
int32_t getMaxSGPR() const { return SgprUB; }
@@ -299,7 +299,7 @@ public:
bool counterOutOfOrder(InstCounterType T) const;
bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
- void determineWait(InstCounterType T, int ScoreToWait,
+ void determineWait(InstCounterType T, uint32_t ScoreToWait,
AMDGPU::Waitcnt &Wait) const;
void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
void applyWaitcnt(InstCounterType T, unsigned Count);
@@ -342,19 +342,19 @@ private:
const GCNSubtarget *ST = nullptr;
bool RevisitLoop = false;
int32_t PostOrder = 0;
- int32_t ScoreLBs[NUM_INST_CNTS] = {0};
- int32_t ScoreUBs[NUM_INST_CNTS] = {0};
+ uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
+ uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
uint32_t PendingEvents = 0;
bool MixedPendingEvents[NUM_INST_CNTS] = {false};
// Remember the last flat memory operation.
- int32_t LastFlat[NUM_INST_CNTS] = {0};
+ uint32_t LastFlat[NUM_INST_CNTS] = {0};
// wait_cnt scores for every vgpr.
// Keep track of the VgprUB and SgprUB to make merge at join efficient.
int32_t VgprUB = 0;
int32_t SgprUB = 0;
- int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+ uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
// Wait cnt scores for every sgpr, only lgkmcnt is relevant.
- int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
+ uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
};
// This is a per-loop-region object that records waitcnt status at the end of
@@ -527,7 +527,7 @@ void BlockWaitcntBrackets::setExpScore(c
const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
const MachineRegisterInfo *MRI,
- unsigned OpNo, int32_t Val) {
+ unsigned OpNo, uint32_t Val) {
RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
LLVM_DEBUG({
const MachineOperand &Opnd = MI->getOperand(OpNo);
@@ -544,7 +544,9 @@ void BlockWaitcntBrackets::updateByEvent
WaitEventType E, MachineInstr &Inst) {
const MachineRegisterInfo &MRIA = *MRI;
InstCounterType T = eventCounter(E);
- int32_t CurrScore = getScoreUB(T) + 1;
+ uint32_t CurrScore = getScoreUB(T) + 1;
+ if (CurrScore == 0)
+ report_fatal_error("InsertWaitcnt score wraparound");
// PendingEvents and ScoreUB need to be update regardless if this event
// changes the score of a register or not.
// Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
@@ -683,8 +685,8 @@ void BlockWaitcntBrackets::updateByEvent
void BlockWaitcntBrackets::print(raw_ostream &OS) {
OS << '\n';
for (auto T : inst_counter_types()) {
- int LB = getScoreLB(T);
- int UB = getScoreUB(T);
+ uint32_t LB = getScoreLB(T);
+ uint32_t UB = getScoreUB(T);
switch (T) {
case VM_CNT:
@@ -704,10 +706,10 @@ void BlockWaitcntBrackets::print(raw_ost
if (LB < UB) {
// Print vgpr scores.
for (int J = 0; J <= getMaxVGPR(); J++) {
- int RegScore = getRegScore(J, T);
+ uint32_t RegScore = getRegScore(J, T);
if (RegScore <= LB)
continue;
- int RelScore = RegScore - LB - 1;
+ uint32_t RelScore = RegScore - LB - 1;
if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
OS << RelScore << ":v" << J << " ";
} else {
@@ -717,10 +719,10 @@ void BlockWaitcntBrackets::print(raw_ost
// Also need to print sgpr scores for lgkm_cnt.
if (T == LGKM_CNT) {
for (int J = 0; J <= getMaxSGPR(); J++) {
- int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
if (RegScore <= LB)
continue;
- int RelScore = RegScore - LB - 1;
+ uint32_t RelScore = RegScore - LB - 1;
OS << RelScore << ":s" << J << " ";
}
}
@@ -740,30 +742,22 @@ bool BlockWaitcntBrackets::simplifyWaitc
bool BlockWaitcntBrackets::simplifyWaitcnt(InstCounterType T,
unsigned &Count) const {
- const int32_t LB = getScoreLB(T);
- const int32_t UB = getScoreUB(T);
- if (Count < (unsigned)UB && UB - (int32_t)Count > LB)
+ const uint32_t LB = getScoreLB(T);
+ const uint32_t UB = getScoreUB(T);
+ if (Count < UB && UB - Count > LB)
return true;
Count = ~0u;
return false;
}
-void BlockWaitcntBrackets::determineWait(InstCounterType T, int ScoreToWait,
+void BlockWaitcntBrackets::determineWait(InstCounterType T,
+ uint32_t ScoreToWait,
AMDGPU::Waitcnt &Wait) const {
- if (ScoreToWait == -1) {
- // The score to wait is unknown. This implies that it was not encountered
- // during the path of the CFG walk done during the current traversal but
- // may be seen on a different path. Emit an s_wait counter with a
- // conservative value of 0 for the counter.
- addWait(Wait, T, 0);
- return;
- }
-
// If the score of src_operand falls within the bracket, we need an
// s_waitcnt instruction.
- const int32_t LB = getScoreLB(T);
- const int32_t UB = getScoreUB(T);
+ const uint32_t LB = getScoreLB(T);
+ const uint32_t UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
if ((T == VM_CNT || T == LGKM_CNT) &&
hasPendingFlat() &&
@@ -790,13 +784,13 @@ void BlockWaitcntBrackets::applyWaitcnt(
}
void BlockWaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
- const int32_t UB = getScoreUB(T);
- if (Count >= (unsigned)UB)
+ const uint32_t UB = getScoreUB(T);
+ if (Count >= UB)
return;
if (Count != 0) {
if (counterOutOfOrder(T))
return;
- setScoreLB(T, std::max(getScoreLB(T), UB - (int32_t)Count));
+ setScoreLB(T, std::max(getScoreLB(T), UB - Count));
} else {
setScoreLB(T, UB);
MixedPendingEvents[T] = false;
@@ -1235,8 +1229,8 @@ void SIInsertWaitcnts::updateEventWaitcn
// this merged score bracket is used when adding waitcnts to the Block
void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
- int32_t MaxPending[NUM_INST_CNTS] = {0};
- int32_t MaxFlat[NUM_INST_CNTS] = {0};
+ uint32_t MaxPending[NUM_INST_CNTS] = {0};
+ uint32_t MaxFlat[NUM_INST_CNTS] = {0};
// For single basic block loops, we need to retain the Block's
// score bracket to have accurate Pred info. So, make a copy of Block's
@@ -1264,7 +1258,7 @@ void SIInsertWaitcnts::mergeInputScoreBr
if (!Visited)
continue;
for (auto T : inst_counter_types()) {
- int span =
+ uint32_t span =
PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
MaxPending[T] = std::max(MaxPending[T], span);
span =
@@ -1291,27 +1285,27 @@ void SIInsertWaitcnts::mergeInputScoreBr
// Now merge the gpr_reg_score information
for (auto T : inst_counter_types()) {
- int PredLB = PredScoreBrackets->getScoreLB(T);
- int PredUB = PredScoreBrackets->getScoreUB(T);
+ uint32_t PredLB = PredScoreBrackets->getScoreLB(T);
+ uint32_t PredUB = PredScoreBrackets->getScoreUB(T);
if (PredLB < PredUB) {
- int PredScale = MaxPending[T] - PredUB;
+ uint32_t PredScale = MaxPending[T] - PredUB;
// Merge vgpr scores.
for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
- int PredRegScore = PredScoreBrackets->getRegScore(J, T);
+ uint32_t PredRegScore = PredScoreBrackets->getRegScore(J, T);
if (PredRegScore <= PredLB)
continue;
- int NewRegScore = PredScale + PredRegScore;
+ uint32_t NewRegScore = PredScale + PredRegScore;
ScoreBrackets->setRegScore(
J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
}
// Also need to merge sgpr scores for lgkm_cnt.
if (T == LGKM_CNT) {
for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
- int PredRegScore =
+ uint32_t PredRegScore =
PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
if (PredRegScore <= PredLB)
continue;
- int NewRegScore = PredScale + PredRegScore;
+ uint32_t NewRegScore = PredScale + PredRegScore;
ScoreBrackets->setRegScore(
J + NUM_ALL_VGPRS, LGKM_CNT,
std::max(
More information about the llvm-commits
mailing list