[llvm] r330862 - [AMDGPU] Waitcnt pass: add debug options

Mark Searles via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 25 12:21:26 PDT 2018


Author: msearles
Date: Wed Apr 25 12:21:26 2018
New Revision: 330862

URL: http://llvm.org/viewvc/llvm-project?rev=330862&view=rev
Log:
[AMDGPU] Waitcnt pass: add debug options

- Add "amdgpu-waitcnt-forcezero" to force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)

- Add debug counters to control force emit of s_waitcnt instrs; debug counters:
si-insert-waitcnts-forceexp: force emit s_waitcnt expcnt(0) instrs
si-insert-waitcnts-forcevm: force emit s_waitcnt lgkmcnt(0) instrs
si-insert-waitcnts-forcelgkm: force emit s_waitcnt vmcnt(0) instrs

- Add some debug statements

Note that a variant of this patch was previously committed/reverted.

Differential Revision: https://reviews.llvm.org/D45888

Added:
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=330862&r1=330861&r2=330862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Apr 25 12:21:26 2018
@@ -40,6 +40,7 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -50,9 +51,21 @@
 #include <utility>
 #include <vector>
 
+using namespace llvm;
+
 #define DEBUG_TYPE "si-insert-waitcnts"
 
-using namespace llvm;
+DEBUG_COUNTER(ForceExpCounter, DEBUG_TYPE"-forceexp",
+              "Force emit s_waitcnt expcnt(0) instrs");
+DEBUG_COUNTER(ForceLgkmCounter, DEBUG_TYPE"-forcelgkm",
+              "Force emit s_waitcnt lgkmcnt(0) instrs");
+DEBUG_COUNTER(ForceVMCounter, DEBUG_TYPE"-forcevm",
+              "Force emit s_waitcnt vmcnt(0) instrs");
+
+static cl::opt<unsigned> ForceEmitZeroFlag(
+  "amdgpu-waitcnt-forcezero",
+  cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"),
+  cl::init(0), cl::Hidden);
 
 namespace {
 
@@ -373,6 +386,9 @@ private:
 
   std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
 
+  bool ForceEmitZeroWaitcnt;
+  bool ForceEmitWaitcnt[NUM_INST_CNTS];
+
 public:
   static char ID;
 
@@ -397,6 +413,41 @@ public:
         llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
   }
 
+  bool isForceEmitWaitcnt() const {
+    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+         T = (enum InstCounterType)(T + 1))
+      if (ForceEmitWaitcnt[T])
+        return true;
+    return false;
+  }
+
+  void setForceEmitWaitcnt() {
+// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
+// For debug builds, get the debug counter info and adjust if need be
+#ifndef NDEBUG
+    if (DebugCounter::isCounterSet(ForceExpCounter) &&
+        DebugCounter::shouldExecute(ForceExpCounter)) {
+      ForceEmitWaitcnt[EXP_CNT] = true;
+    } else {
+      ForceEmitWaitcnt[EXP_CNT] = false;
+    }
+
+    if (DebugCounter::isCounterSet(ForceLgkmCounter) &&
+         DebugCounter::shouldExecute(ForceLgkmCounter)) {
+      ForceEmitWaitcnt[LGKM_CNT] = true;
+    } else {
+      ForceEmitWaitcnt[LGKM_CNT] = false;
+    }
+
+    if (DebugCounter::isCounterSet(ForceVMCounter) &&
+        DebugCounter::shouldExecute(ForceVMCounter)) {
+      ForceEmitWaitcnt[VM_CNT] = true;
+    } else {
+      ForceEmitWaitcnt[VM_CNT] = false;
+    }
+#endif // NDEBUG
+  }
+
   bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
   void generateWaitcntInstBefore(MachineInstr &MI,
                                   BlockWaitcntBrackets *ScoreBrackets);
@@ -836,6 +887,10 @@ void SIInsertWaitcnts::generateWaitcntIn
   // sc-merge. It is not a coincident that all these cases correspond to the
   // instructions that are skipped in the assembling loop.
   bool NeedLineMapping = false; // TODO: Check on this.
+  setForceEmitWaitcnt();
+
+  bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
+
   if (MI.isDebugValue() &&
       // TODO: any other opcode?
       !NeedLineMapping) {
@@ -1047,9 +1102,6 @@ void SIInsertWaitcnts::generateWaitcntIn
     } // End of for loop that looks at all dest operands.
   }
 
-  // TODO: Tie force zero to a compiler triage option.
-  bool ForceZero = false;
-
   // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
   // occurs before the instruction. Doing it here prevents any additional
   // S_WAITCNTs from being emitted if the instruction was marked as
@@ -1076,17 +1128,17 @@ void SIInsertWaitcnts::generateWaitcntIn
       // block, so if we only wait on LGKM here, we might end up with
       // another s_waitcnt inserted right after this if there are non-LGKM
       // instructions still outstanding.
-      ForceZero = true;
+      ForceEmitZeroWaitcnt = true;
       EmitWaitcnt = true;
     }
   }
 
   // Does this operand processing indicate s_wait counter update?
-  if (EmitWaitcnt) {
+  if (EmitWaitcnt || IsForceEmitWaitcnt) {
     int CntVal[NUM_INST_CNTS];
 
     bool UseDefaultWaitcntStrategy = true;
-    if (ForceZero) {
+    if (ForceEmitZeroWaitcnt) {
       // Force all waitcnts to 0.
       for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
            T = (enum InstCounterType)(T + 1)) {
@@ -1123,7 +1175,7 @@ void SIInsertWaitcnts::generateWaitcntIn
     }
 
     // If we are not waiting on any counter we can skip the wait altogether.
-    if (EmitWaitcnt != 0) {
+    if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
       MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
       int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
       if (!OldWaitcnt ||
@@ -1151,8 +1203,10 @@ void SIInsertWaitcnts::generateWaitcntIn
       }
 
       // Update an existing waitcount, or make a new one.
-      unsigned Enc = AMDGPU::encodeWaitcnt(IV, CntVal[VM_CNT],
-                                           CntVal[EXP_CNT], CntVal[LGKM_CNT]);
+      unsigned Enc = AMDGPU::encodeWaitcnt(IV,
+                      ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
+                      ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
+                      ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
       // We don't remove waitcnts that existed prior to the waitcnt
       // pass. Check if the waitcnt to-be-inserted can be avoided
       // or if the prev waitcnt can be updated.
@@ -1178,6 +1232,11 @@ void SIInsertWaitcnts::generateWaitcntIn
       }
       if (insertSWaitInst) {
         if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
+          if (ForceEmitZeroWaitcnt)
+            DEBUG(dbgs() << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
+          if (IsForceEmitWaitcnt)
+            DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
+
           OldWaitcnt->getOperand(0).setImm(Enc);
           if (!OldWaitcnt->getParent())
             MI.getParent()->insert(MI, OldWaitcnt);
@@ -1603,7 +1662,7 @@ void SIInsertWaitcnts::insertWaitcntInBl
   BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
 
   DEBUG({
-    dbgs() << "Block" << Block.getNumber();
+    dbgs() << "*** Block" << Block.getNumber() << " ***";
     ScoreBrackets->dump();
   });
 
@@ -1769,6 +1828,11 @@ bool SIInsertWaitcnts::runOnMachineFunct
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   AMDGPUASI = ST->getAMDGPUAS();
 
+  ForceEmitZeroWaitcnt = ForceEmitZeroFlag;
+  for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+       T = (enum InstCounterType)(T + 1))
+    ForceEmitWaitcnt[T] = false;
+
   HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
   HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
   HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);

Added: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir?rev=330862&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir Wed Apr 25 12:21:26 2018
@@ -0,0 +1,41 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -debug-counter=si-insert-waitcnts-forcelgkm-count=1 -o - %s | FileCheck -check-prefixes=GCN,LGKM %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -debug-counter=si-insert-waitcnts-forceexp-count=2 -o - %s | FileCheck -check-prefixes=GCN,EXP %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -debug-counter=si-insert-waitcnts-forcevm-count=3 -o - %s | FileCheck -check-prefixes=GCN,VM %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 -debug-counter=si-insert-waitcnts-forcevm-count=2 -o - %s | FileCheck -check-prefixes=GCN,ZERO %s
+
+# check that the waitcnt pass options that force insertion of waitcnt instructions are working as expected
+
+...
+# GCN-LABEL: name: waitcnt-debug
+# LGKM: S_WAITCNT 127
+# LGKM-NEXT: S_NOP 0
+# LGKM-NEXT: S_NOP 0
+
+# EXP: S_WAITCNT 3855
+# EXP-NEXT: S_NOP 0
+# EXP-NEXT: S_WAITCNT 3855
+# EXP-NEXT: S_NOP 0
+
+# VM: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+# VM-NEXT: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+# VM-NEXT: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+
+# ZERO: S_WAITCNT 0
+# ZERO-NEXT: S_WAITCNT 0
+# ZERO-NEXT: S_NOP 0
+# ZERO-NEXT: S_WAITCNT 0
+# ZERO-NEXT: S_NOP 0
+
+name:            waitcnt-debug
+liveins:
+body:             |
+  bb.0:
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+...




More information about the llvm-commits mailing list