[llvm] c8a9031 - [AMDGPU] Small cleanups in wait counter code

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 28 03:05:09 PDT 2022


Author: Stephen Thomas
Date: 2022-10-28T11:05:02+01:00
New Revision: c8a90316fa75f57b3789c9346d482b379f628c09

URL: https://github.com/llvm/llvm-project/commit/c8a90316fa75f57b3789c9346d482b379f628c09
DIFF: https://github.com/llvm/llvm-project/commit/c8a90316fa75f57b3789c9346d482b379f628c09.diff

LOG: [AMDGPU] Small cleanups in wait counter code

A small number of cleanups and refactors intended to enhance readability in
two passes that deal with s_waitcnt instructions.

Differential Revision: https://reviews.llvm.org/D136677

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index e760575cb586..81013db1f003 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1332,6 +1332,12 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
   return false;
 }
 
+static bool isStoreCountWaitZero(const MachineInstr &I) {
+  return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
+         I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
+         !I.getOperand(1).getImm();
+}
+
 bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
   if (!RunLdsBranchVmemWARHazardFixup)
     return false;
@@ -1351,9 +1357,7 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
     return false;
 
   auto IsExpiredFn = [&IsHazardInst](const MachineInstr &I, int) {
-    return IsHazardInst(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
-                               I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
-                               !I.getOperand(1).getImm());
+    return IsHazardInst(I) || isStoreCountWaitZero(I);
   };
 
   auto IsHazardFn = [InstType, &IsHazardInst](const MachineInstr &I) {
@@ -1370,9 +1374,7 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
       if (InstType == InstType2)
         return true;
 
-      return I.getOpcode() == AMDGPU::S_WAITCNT_VSCNT &&
-             I.getOperand(0).getReg() == AMDGPU::SGPR_NULL &&
-             !I.getOperand(1).getImm();
+      return isStoreCountWaitZero(I);
     };
 
     return ::getWaitStatesSince(IsHazardFn, &I, IsExpiredFn) !=

diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 5b352ac7b870..5e0ae4c2581f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -211,19 +211,20 @@ class WaitcntBrackets {
     return ScoreUBs[T];
   }
 
+  unsigned getScoreRange(InstCounterType T) const {
+    return getScoreUB(T) - getScoreLB(T);
+  }
+
   // Mapping from event to counter.
   InstCounterType eventCounter(WaitEventType E) {
-    if (WaitEventMaskForInst[VM_CNT] & (1 << E))
-      return VM_CNT;
-    if (WaitEventMaskForInst[LGKM_CNT] & (1 << E))
-      return LGKM_CNT;
-    if (WaitEventMaskForInst[VS_CNT] & (1 << E))
-      return VS_CNT;
-    assert(WaitEventMaskForInst[EXP_CNT] & (1 << E));
-    return EXP_CNT;
+    for (auto T : inst_counter_types()) {
+      if (WaitEventMaskForInst[T] & (1 << E))
+        return T;
+    }
+    llvm_unreachable("event type has no associated counter");
   }
 
-  unsigned getRegScore(int GprNo, InstCounterType T) {
+  unsigned getRegScore(int GprNo, InstCounterType T) const {
     if (GprNo < NUM_ALL_VGPRS) {
       return VgprScores[T][GprNo];
     }
@@ -240,21 +241,25 @@ class WaitcntBrackets {
   bool counterOutOfOrder(InstCounterType T) const;
   void simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const;
   void simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
-  void determineWait(InstCounterType T, unsigned ScoreToWait,
-                     AMDGPU::Waitcnt &Wait) const;
+  void determineWait(InstCounterType T, int RegNo, AMDGPU::Waitcnt &Wait) const;
   void applyWaitcnt(const AMDGPU::Waitcnt &Wait);
   void applyWaitcnt(InstCounterType T, unsigned Count);
   void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
                      const MachineRegisterInfo *MRI, WaitEventType E,
                      MachineInstr &MI);
 
-  bool hasPending() const { return PendingEvents != 0; }
-  bool hasPendingEvent(WaitEventType E) const {
+  unsigned hasPendingEvent() const { return PendingEvents; }
+  unsigned hasPendingEvent(WaitEventType E) const {
     return PendingEvents & (1 << E);
   }
+  unsigned hasPendingEvent(InstCounterType T) const {
+    unsigned HasPending = PendingEvents & WaitEventMaskForInst[T];
+    assert((HasPending != 0) == (getScoreRange(T) != 0));
+    return HasPending;
+  }
 
   bool hasMixedPendingEvents(InstCounterType T) const {
-    unsigned Events = PendingEvents & WaitEventMaskForInst[T];
+    unsigned Events = hasPendingEvent(T);
     // Return true if more than one bit is set in Events.
     return Events & (Events - 1);
   }
@@ -304,11 +309,12 @@ class WaitcntBrackets {
   void setScoreUB(InstCounterType T, unsigned Val) {
     assert(T < NUM_INST_CNTS);
     ScoreUBs[T] = Val;
-    if (T == EXP_CNT) {
-      unsigned UB = ScoreUBs[T] - getWaitCountMax(EXP_CNT);
-      if (ScoreLBs[T] < UB && UB < ScoreUBs[T])
-        ScoreLBs[T] = UB;
-    }
+
+    if (T != EXP_CNT)
+      return;
+
+    if (getScoreRange(EXP_CNT) > getWaitCountMax(EXP_CNT))
+      ScoreLBs[EXP_CNT] = ScoreUBs[EXP_CNT] - getWaitCountMax(EXP_CNT);
   }
 
   void setRegScore(int GprNo, InstCounterType T, unsigned Val) {
@@ -694,29 +700,30 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
 void WaitcntBrackets::print(raw_ostream &OS) {
   OS << '\n';
   for (auto T : inst_counter_types()) {
-    unsigned LB = getScoreLB(T);
-    unsigned UB = getScoreUB(T);
+    unsigned SR = getScoreRange(T);
 
     switch (T) {
     case VM_CNT:
-      OS << "    VM_CNT(" << UB - LB << "): ";
+      OS << "    VM_CNT(" << SR << "): ";
       break;
     case LGKM_CNT:
-      OS << "    LGKM_CNT(" << UB - LB << "): ";
+      OS << "    LGKM_CNT(" << SR << "): ";
       break;
     case EXP_CNT:
-      OS << "    EXP_CNT(" << UB - LB << "): ";
+      OS << "    EXP_CNT(" << SR << "): ";
       break;
     case VS_CNT:
-      OS << "    VS_CNT(" << UB - LB << "): ";
+      OS << "    VS_CNT(" << SR << "): ";
       break;
     default:
-      OS << "    UNKNOWN(" << UB - LB << "): ";
+      OS << "    UNKNOWN(" << SR << "): ";
       break;
     }
 
-    if (LB < UB) {
+    if (SR != 0) {
       // Print vgpr scores.
+      unsigned LB = getScoreLB(T);
+
       for (int J = 0; J <= VgprUB; J++) {
         unsigned RegScore = getRegScore(J, T);
         if (RegScore <= LB)
@@ -755,18 +762,17 @@ void WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) const {
 
 void WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
                                       unsigned &Count) const {
-  const unsigned LB = getScoreLB(T);
-  const unsigned UB = getScoreUB(T);
-
   // The number of outstanding events for this type, T, can be calculated
   // as (UB - LB). If the current Count is greater than or equal to the number
   // of outstanding events, then the wait for this counter is redundant.
-  if (Count >= UB - LB)
+  if (Count >= getScoreRange(T))
     Count = ~0u;
 }
 
-void WaitcntBrackets::determineWait(InstCounterType T, unsigned ScoreToWait,
+void WaitcntBrackets::determineWait(InstCounterType T, int RegNo,
                                     AMDGPU::Waitcnt &Wait) const {
+  unsigned ScoreToWait = getRegScore(RegNo, T);
+
   // If the score of src_operand falls within the bracket, we need an
   // s_waitcnt instruction.
   const unsigned LB = getScoreLB(T);
@@ -1106,8 +1112,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
 
         for (int RegNo = CallAddrOpInterval.first;
              RegNo < CallAddrOpInterval.second; ++RegNo)
-          ScoreBrackets.determineWait(
-            LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+          ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait);
 
         int RtnAddrOpIdx =
           AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
@@ -1117,8 +1122,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
 
           for (int RegNo = RtnAddrOpInterval.first;
                RegNo < RtnAddrOpInterval.second; ++RegNo)
-            ScoreBrackets.determineWait(
-              LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+            ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait);
         }
       }
     } else {
@@ -1150,11 +1154,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
           continue;
         unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
         // VM_CNT is only relevant to vgpr or LDS.
-        ScoreBrackets.determineWait(
-            VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+        ScoreBrackets.determineWait(VM_CNT, RegNo, Wait);
         if (Memop->isStore()) {
-          ScoreBrackets.determineWait(
-              EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+          ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait);
         }
       }
 
@@ -1176,17 +1178,14 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
             if (Op.isUse() || !SIInstrInfo::isVMEM(MI) ||
                 ScoreBrackets.hasOtherPendingVmemTypes(RegNo,
                                                        getVmemType(MI))) {
-              ScoreBrackets.determineWait(
-                  VM_CNT, ScoreBrackets.getRegScore(RegNo, VM_CNT), Wait);
+              ScoreBrackets.determineWait(VM_CNT, RegNo, Wait);
               ScoreBrackets.clearVgprVmemTypes(RegNo);
             }
             if (Op.isDef() || ScoreBrackets.hasPendingEvent(EXP_LDS_ACCESS)) {
-              ScoreBrackets.determineWait(
-                  EXP_CNT, ScoreBrackets.getRegScore(RegNo, EXP_CNT), Wait);
+              ScoreBrackets.determineWait(EXP_CNT, RegNo, Wait);
             }
           }
-          ScoreBrackets.determineWait(
-              LGKM_CNT, ScoreBrackets.getRegScore(RegNo, LGKM_CNT), Wait);
+          ScoreBrackets.determineWait(LGKM_CNT, RegNo, Wait);
         }
       }
     }
@@ -1205,9 +1204,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
   //       after fixing the scheduler. Also, the Shader Compiler code is
   //       independent of target.
   if (readsVCCZ(MI) && ST->hasReadVCCZBug()) {
-    if (ScoreBrackets.getScoreLB(LGKM_CNT) <
-            ScoreBrackets.getScoreUB(LGKM_CNT) &&
-        ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
+    if (ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
       Wait.LgkmCnt = 0;
     }
   }
@@ -1228,9 +1225,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
     Wait.VsCnt = 0;
 
   if (FlushVmCnt) {
-    unsigned UB = ScoreBrackets.getScoreUB(VM_CNT);
-    unsigned LB = ScoreBrackets.getScoreLB(VM_CNT);
-    if (UB - LB != 0)
+    if (ScoreBrackets.hasPendingEvent(VM_CNT))
       Wait.VmCnt = 0;
   }
 
@@ -1245,9 +1240,7 @@ bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block,
                                                MachineInstr *OldWaitcntInstr) {
   AMDGPU::Waitcnt Wait;
 
-  unsigned UB = ScoreBrackets.getScoreUB(VM_CNT);
-  unsigned LB = ScoreBrackets.getScoreLB(VM_CNT);
-  if (UB - LB == 0)
+  if (!ScoreBrackets.hasPendingEvent(VM_CNT))
     return false;
 
   Wait.VmCnt = 0;
@@ -1603,8 +1596,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
         // 2. Restore the correct value of vccz by writing the current value
         //    of vcc back to vcc.
         if (ST->hasReadVCCZBug() &&
-            ScoreBrackets.getScoreLB(LGKM_CNT) <
-                ScoreBrackets.getScoreUB(LGKM_CNT) &&
             ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
           // Writes to vcc while there's an outstanding smem read may get
           // clobbered as soon as any read completes.
@@ -1848,7 +1839,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
       Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
       BI.Dirty = false;
 
-      if (Brackets->hasPending()) {
+      if (Brackets->hasPendingEvent()) {
         BlockInfo *MoveBracketsToSucc = nullptr;
         for (MachineBasicBlock *Succ : BI.MBB->successors()) {
           auto SuccBII = BlockInfos.find(Succ);


        


More information about the llvm-commits mailing list