[llvm] r347852 - AMDGPU/InsertWaitcnt: Consistently use uint32_t for scores / time points

Nicolai Haehnle via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 29 03:06:22 PST 2018


Author: nha
Date: Thu Nov 29 03:06:21 2018
New Revision: 347852

URL: http://llvm.org/viewvc/llvm-project?rev=347852&view=rev
Log:
AMDGPU/InsertWaitcnt: Consistently use uint32_t for scores / time points

Summary:
There is one obsolete reference to using -1 as an indication of "unknown",
but this isn't actually used anywhere.

Using unsigned makes robust wrapping checks easier.

Reviewers: msearles, rampitec, scott.linder, kanarayan

Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, llvm-commits, tpr, t-tye, hakzsam

Differential Revision: https://reviews.llvm.org/D54230

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=347852&r1=347851&r2=347852&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Thu Nov 29 03:06:21 2018
@@ -112,9 +112,9 @@ iterator_range<enum_iterator<InstCounter
 using RegInterval = std::pair<signed, signed>;
 
 struct {
-  int32_t VmcntMax;
-  int32_t ExpcntMax;
-  int32_t LgkmcntMax;
+  uint32_t VmcntMax;
+  uint32_t ExpcntMax;
+  uint32_t LgkmcntMax;
   int32_t NumVGPRsMax;
   int32_t NumSGPRsMax;
 } HardwareLimits;
@@ -194,7 +194,7 @@ public:
 
   ~BlockWaitcntBrackets() = default;
 
-  static int32_t getWaitCountMax(InstCounterType T) {
+  static uint32_t getWaitCountMax(InstCounterType T) {
     switch (T) {
     case VM_CNT:
       return HardwareLimits.VmcntMax;
@@ -208,33 +208,33 @@ public:
     return 0;
   }
 
-  void setScoreLB(InstCounterType T, int32_t Val) {
+  void setScoreLB(InstCounterType T, uint32_t Val) {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return;
     ScoreLBs[T] = Val;
   }
 
-  void setScoreUB(InstCounterType T, int32_t Val) {
+  void setScoreUB(InstCounterType T, uint32_t Val) {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return;
     ScoreUBs[T] = Val;
     if (T == EXP_CNT) {
-      int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
-      if (ScoreLBs[T] < UB)
+      uint32_t UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
+      if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
         ScoreLBs[T] = UB;
     }
   }
 
-  int32_t getScoreLB(InstCounterType T) const {
+  uint32_t getScoreLB(InstCounterType T) const {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return 0;
     return ScoreLBs[T];
   }
 
-  int32_t getScoreUB(InstCounterType T) const {
+  uint32_t getScoreUB(InstCounterType T) const {
     assert(T < NUM_INST_CNTS);
     if (T >= NUM_INST_CNTS)
       return 0;
@@ -251,7 +251,7 @@ public:
     return EXP_CNT;
   }
 
-  void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
+  void setRegScore(int GprNo, InstCounterType T, uint32_t Val) {
     if (GprNo < NUM_ALL_VGPRS) {
       if (GprNo > VgprUB) {
         VgprUB = GprNo;
@@ -266,7 +266,7 @@ public:
     }
   }
 
-  int32_t getRegScore(int GprNo, InstCounterType T) {
+  uint32_t getRegScore(int GprNo, InstCounterType T) {
     if (GprNo < NUM_ALL_VGPRS) {
       return VgprScores[T][GprNo];
     }
@@ -291,7 +291,7 @@ public:
 
   void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
                    const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
-                   unsigned OpNo, int32_t Val);
+                   unsigned OpNo, uint32_t Val);
 
   int32_t getMaxVGPR() const { return VgprUB; }
   int32_t getMaxSGPR() const { return SgprUB; }
@@ -299,7 +299,7 @@ public:
   bool counterOutOfOrder(InstCounterType T) const;
   bool simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
   bool simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
-  void determineWait(InstCounterType T, int ScoreToWait,
+  void determineWait(InstCounterType T, uint32_t ScoreToWait,
                      AMDGPU::Waitcnt &Wait) const;
   void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
   void applyWaitcnt(InstCounterType T, unsigned Count);
@@ -342,19 +342,19 @@ private:
   const GCNSubtarget *ST = nullptr;
   bool RevisitLoop = false;
   int32_t PostOrder = 0;
-  int32_t ScoreLBs[NUM_INST_CNTS] = {0};
-  int32_t ScoreUBs[NUM_INST_CNTS] = {0};
+  uint32_t ScoreLBs[NUM_INST_CNTS] = {0};
+  uint32_t ScoreUBs[NUM_INST_CNTS] = {0};
   uint32_t PendingEvents = 0;
   bool MixedPendingEvents[NUM_INST_CNTS] = {false};
   // Remember the last flat memory operation.
-  int32_t LastFlat[NUM_INST_CNTS] = {0};
+  uint32_t LastFlat[NUM_INST_CNTS] = {0};
   // wait_cnt scores for every vgpr.
   // Keep track of the VgprUB and SgprUB to make merge at join efficient.
   int32_t VgprUB = 0;
   int32_t SgprUB = 0;
-  int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+  uint32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
   // Wait cnt scores for every sgpr, only lgkmcnt is relevant.
-  int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
+  uint32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
 };
 
 // This is a per-loop-region object that records waitcnt status at the end of
@@ -527,7 +527,7 @@ void BlockWaitcntBrackets::setExpScore(c
                                        const SIInstrInfo *TII,
                                        const SIRegisterInfo *TRI,
                                        const MachineRegisterInfo *MRI,
-                                       unsigned OpNo, int32_t Val) {
+                                       unsigned OpNo, uint32_t Val) {
   RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
   LLVM_DEBUG({
     const MachineOperand &Opnd = MI->getOperand(OpNo);
@@ -544,7 +544,9 @@ void BlockWaitcntBrackets::updateByEvent
                                          WaitEventType E, MachineInstr &Inst) {
   const MachineRegisterInfo &MRIA = *MRI;
   InstCounterType T = eventCounter(E);
-  int32_t CurrScore = getScoreUB(T) + 1;
+  uint32_t CurrScore = getScoreUB(T) + 1;
+  if (CurrScore == 0)
+    report_fatal_error("InsertWaitcnt score wraparound");
   // PendingEvents and ScoreUB need to be update regardless if this event
   // changes the score of a register or not.
   // Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
@@ -683,8 +685,8 @@ void BlockWaitcntBrackets::updateByEvent
 void BlockWaitcntBrackets::print(raw_ostream &OS) {
   OS << '\n';
   for (auto T : inst_counter_types()) {
-    int LB = getScoreLB(T);
-    int UB = getScoreUB(T);
+    uint32_t LB = getScoreLB(T);
+    uint32_t UB = getScoreUB(T);
 
     switch (T) {
     case VM_CNT:
@@ -704,10 +706,10 @@ void BlockWaitcntBrackets::print(raw_ost
     if (LB < UB) {
       // Print vgpr scores.
       for (int J = 0; J <= getMaxVGPR(); J++) {
-        int RegScore = getRegScore(J, T);
+        uint32_t RegScore = getRegScore(J, T);
         if (RegScore <= LB)
           continue;
-        int RelScore = RegScore - LB - 1;
+        uint32_t RelScore = RegScore - LB - 1;
         if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
           OS << RelScore << ":v" << J << " ";
         } else {
@@ -717,10 +719,10 @@ void BlockWaitcntBrackets::print(raw_ost
       // Also need to print sgpr scores for lgkm_cnt.
       if (T == LGKM_CNT) {
         for (int J = 0; J <= getMaxSGPR(); J++) {
-          int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+          uint32_t RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
           if (RegScore <= LB)
             continue;
-          int RelScore = RegScore - LB - 1;
+          uint32_t RelScore = RegScore - LB - 1;
           OS << RelScore << ":s" << J << " ";
         }
       }
@@ -740,30 +742,22 @@ bool BlockWaitcntBrackets::simplifyWaitc
 
 bool BlockWaitcntBrackets::simplifyWaitcnt(InstCounterType T,
                                            unsigned &Count) const {
-  const int32_t LB = getScoreLB(T);
-  const int32_t UB = getScoreUB(T);
-  if (Count < (unsigned)UB && UB - (int32_t)Count > LB)
+  const uint32_t LB = getScoreLB(T);
+  const uint32_t UB = getScoreUB(T);
+  if (Count < UB && UB - Count > LB)
     return true;
 
   Count = ~0u;
   return false;
 }
 
-void BlockWaitcntBrackets::determineWait(InstCounterType T, int ScoreToWait,
+void BlockWaitcntBrackets::determineWait(InstCounterType T,
+                                         uint32_t ScoreToWait,
                                          AMDGPU::Waitcnt &Wait) const {
-  if (ScoreToWait == -1) {
-    // The score to wait is unknown. This implies that it was not encountered
-    // during the path of the CFG walk done during the current traversal but
-    // may be seen on a different path. Emit an s_wait counter with a
-    // conservative value of 0 for the counter.
-    addWait(Wait, T, 0);
-    return;
-  }
-
   // If the score of src_operand falls within the bracket, we need an
   // s_waitcnt instruction.
-  const int32_t LB = getScoreLB(T);
-  const int32_t UB = getScoreUB(T);
+  const uint32_t LB = getScoreLB(T);
+  const uint32_t UB = getScoreUB(T);
   if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
     if ((T == VM_CNT || T == LGKM_CNT) &&
         hasPendingFlat() &&
@@ -790,13 +784,13 @@ void BlockWaitcntBrackets::applyWaitcnt(
 }
 
 void BlockWaitcntBrackets::applyWaitcnt(InstCounterType T, unsigned Count) {
-  const int32_t UB = getScoreUB(T);
-  if (Count >= (unsigned)UB)
+  const uint32_t UB = getScoreUB(T);
+  if (Count >= UB)
     return;
   if (Count != 0) {
     if (counterOutOfOrder(T))
       return;
-    setScoreLB(T, std::max(getScoreLB(T), UB - (int32_t)Count));
+    setScoreLB(T, std::max(getScoreLB(T), UB - Count));
   } else {
     setScoreLB(T, UB);
     MixedPendingEvents[T] = false;
@@ -1235,8 +1229,8 @@ void SIInsertWaitcnts::updateEventWaitcn
 // this merged score bracket is used when adding waitcnts to the Block
 void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
   BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
-  int32_t MaxPending[NUM_INST_CNTS] = {0};
-  int32_t MaxFlat[NUM_INST_CNTS] = {0};
+  uint32_t MaxPending[NUM_INST_CNTS] = {0};
+  uint32_t MaxFlat[NUM_INST_CNTS] = {0};
 
   // For single basic block loops, we need to retain the Block's
   // score bracket to have accurate Pred info. So, make a copy of Block's
@@ -1264,7 +1258,7 @@ void SIInsertWaitcnts::mergeInputScoreBr
     if (!Visited)
       continue;
     for (auto T : inst_counter_types()) {
-      int span =
+      uint32_t span =
           PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
       MaxPending[T] = std::max(MaxPending[T], span);
       span =
@@ -1291,27 +1285,27 @@ void SIInsertWaitcnts::mergeInputScoreBr
 
     // Now merge the gpr_reg_score information
     for (auto T : inst_counter_types()) {
-      int PredLB = PredScoreBrackets->getScoreLB(T);
-      int PredUB = PredScoreBrackets->getScoreUB(T);
+      uint32_t PredLB = PredScoreBrackets->getScoreLB(T);
+      uint32_t PredUB = PredScoreBrackets->getScoreUB(T);
       if (PredLB < PredUB) {
-        int PredScale = MaxPending[T] - PredUB;
+        uint32_t PredScale = MaxPending[T] - PredUB;
         // Merge vgpr scores.
         for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
-          int PredRegScore = PredScoreBrackets->getRegScore(J, T);
+          uint32_t PredRegScore = PredScoreBrackets->getRegScore(J, T);
           if (PredRegScore <= PredLB)
             continue;
-          int NewRegScore = PredScale + PredRegScore;
+          uint32_t NewRegScore = PredScale + PredRegScore;
           ScoreBrackets->setRegScore(
               J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
         }
         // Also need to merge sgpr scores for lgkm_cnt.
         if (T == LGKM_CNT) {
           for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
-            int PredRegScore =
+            uint32_t PredRegScore =
                 PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
             if (PredRegScore <= PredLB)
               continue;
-            int NewRegScore = PredScale + PredRegScore;
+            uint32_t NewRegScore = PredScale + PredRegScore;
             ScoreBrackets->setRegScore(
                 J + NUM_ALL_VGPRS, LGKM_CNT,
                 std::max(




More information about the llvm-commits mailing list