[llvm] r323788 - [AMDGPU] Add options for waitcnt pass debugging; add instr count in debug output.

Mark Searles via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 30 08:49:38 PST 2018


Author: msearles
Date: Tue Jan 30 08:49:38 2018
New Revision: 323788

URL: http://llvm.org/viewvc/llvm-project?rev=323788&view=rev
Log:
[AMDGPU] Add options for waitcnt pass debugging; add instr count in debug output.
-amdgpu-waitcnt-forcezero={1|0}  Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-amdgpu-waitcnt-forceexp=<n>  Force emit a s_waitcnt expcnt(0) before the first <n> instrs
-amdgpu-waitcnt-forcelgkm=<n> Force emit a s_waitcnt lgkmcnt(0) before the first <n> instrs
-amdgpu-waitcnt-forcevm=<n>   Force emit a s_waitcnt vmcnt(0) before the first <n> instrs

This patch was pushed ( abb190fd51cd2f9a9eef08c024e109f7f7e909fc ), which caused a buildbot failure, reverted ( 6227480d74da507cf8e1b4bcaffbdb9fb875b4b8 ), and then updated to fix buildbot failures (this patch).

Differential Revision: https://reviews.llvm.org/D40091

Added:
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=323788&r1=323787&r2=323788&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Jan 30 08:49:38 2018
@@ -54,6 +54,26 @@
 
 using namespace llvm;
 
+static cl::opt<unsigned> ForceZeroFlag(
+  "amdgpu-waitcnt-forcezero",
+  cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"),
+  cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> ForceExpFlag(
+  "amdgpu-waitcnt-forceexp",
+  cl::desc("Force emit a s_waitcnt expcnt(0) before the first <n> instrs"),
+  cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> ForceLgkmFlag(
+  "amdgpu-waitcnt-forcelgkm",
+  cl::desc("Force emit a s_waitcnt lgkmcnt(0) before the first <n> instrs"),
+  cl::init(0), cl::Hidden);
+
+static cl::opt<unsigned> ForceVmFlag(
+  "amdgpu-waitcnt-forcevm",
+  cl::desc("Force emit a s_waitcnt vmcnt(0) before the first <n> instrs"),
+  cl::init(0), cl::Hidden);
+
 namespace {
 
 // Class of object that encapsulates latest instruction counter score
@@ -373,6 +393,9 @@ private:
 
   std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
 
+  bool ForceZero = false;
+  int32_t ForceSwaitcnt[NUM_INST_CNTS];
+
 public:
   static char ID;
 
@@ -397,6 +420,14 @@ public:
         llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
   }
 
+  bool ForceEmit() const {
+    for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+         T = (enum InstCounterType)(T + 1))
+      if (ForceSwaitcnt[T] > 0)
+        return true;
+    return false;
+  }
+
   bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
   MachineInstr *generateSWaitCntInstBefore(MachineInstr &MI,
                                            BlockWaitcntBrackets *ScoreBrackets);
@@ -1023,9 +1054,6 @@ MachineInstr *SIInsertWaitcnts::generate
     } // End of for loop that looks at all dest operands.
   }
 
-  // TODO: Tie force zero to a compiler triage option.
-  bool ForceZero = false;
-
   // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
   // occurs before the instruction. Doing it here prevents any additional
   // S_WAITCNTs from being emitted if the instruction was marked as
@@ -1058,7 +1086,7 @@ MachineInstr *SIInsertWaitcnts::generate
   }
 
   // Does this operand processing indicate s_wait counter update?
-  if (EmitSwaitcnt) {
+  if (EmitSwaitcnt || ForceEmit()) {
     int CntVal[NUM_INST_CNTS];
 
     bool UseDefaultWaitcntStrategy = true;
@@ -1099,7 +1127,7 @@ MachineInstr *SIInsertWaitcnts::generate
     }
 
     // If we are not waiting on any counter we can skip the wait altogether.
-    if (EmitSwaitcnt != 0) {
+    if (EmitSwaitcnt != 0 || ForceEmit()) {
       MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
       int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
       if (!OldWaitcnt || (AMDGPU::decodeVmcnt(IV, Imm) !=
@@ -1135,11 +1163,31 @@ MachineInstr *SIInsertWaitcnts::generate
         CompilerGeneratedWaitcntSet.insert(SWaitInst);
       }
 
+      if (!EmitSwaitcnt) {
+        for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+             T = (enum InstCounterType)(T + 1)) {
+          if (ForceSwaitcnt[T] > 0 ) {
+            DEBUG(dbgs() << "ForceSwaitcnt[" << T << "]: "
+                  << ForceSwaitcnt[T] << '\n';);
+          }
+        }
+      }
+
       const MachineOperand &Op =
           MachineOperand::CreateImm(AMDGPU::encodeWaitcnt(
-              IV, CntVal[VM_CNT], CntVal[EXP_CNT], CntVal[LGKM_CNT]));
+              IV,
+              (ForceSwaitcnt[VM_CNT]   > 0) ? 0 : CntVal[VM_CNT],
+              (ForceSwaitcnt[EXP_CNT]  > 0) ? 0 : CntVal[EXP_CNT],
+              (ForceSwaitcnt[LGKM_CNT] > 0) ? 0 : CntVal[LGKM_CNT]));
       SWaitInst->addOperand(MF, Op);
 
+      if (!EmitSwaitcnt) {
+        for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+             T = (enum InstCounterType)(T + 1)) {
+          --ForceSwaitcnt[T];
+        }
+      }
+
       if (CntVal[EXP_CNT] == 0) {
         ScoreBrackets->setMixedExpTypes(false);
       }
@@ -1512,13 +1560,15 @@ MachineBasicBlock *SIInsertWaitcnts::loo
 // Generate s_waitcnt instructions where needed.
 void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
                                             MachineBasicBlock &Block) {
+  static int32_t InstCnt = 0;
+
   // Initialize the state information.
   mergeInputScoreBrackets(Block);
 
   BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
 
   DEBUG({
-    dbgs() << "Block" << Block.getNumber();
+    dbgs() << "*** Block" << Block.getNumber() << " ***";
     ScoreBrackets->dump();
   });
 
@@ -1591,7 +1641,7 @@ void SIInsertWaitcnts::insertWaitcntInBl
       DEBUG({ SWaitInst->print(dbgs() << '\n'); });
     }
     DEBUG({
-      Inst.print(dbgs());
+      dbgs() << "Instr" << ++InstCnt << ": " << Inst;
       ScoreBrackets->dump();
     });
 
@@ -1696,6 +1746,11 @@ bool SIInsertWaitcnts::runOnMachineFunct
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   AMDGPUASI = ST->getAMDGPUAS();
 
+  ForceZero = ForceZeroFlag;
+  ForceSwaitcnt[VM_CNT] = ForceVmFlag;
+  ForceSwaitcnt[EXP_CNT] = ForceExpFlag;
+  ForceSwaitcnt[LGKM_CNT] = ForceLgkmFlag;
+
   HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
   HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
   HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);

Added: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir?rev=323788&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir Tue Jan 30 08:49:38 2018
@@ -0,0 +1,41 @@
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcelgkm=1 -o - %s | FileCheck -check-prefixes=GCN,LGKM %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -amdgpu-waitcnt-forceexp=2 -o - %s | FileCheck -check-prefixes=GCN,EXP %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcevm=3 -o - %s | FileCheck -check-prefixes=GCN,VM %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 -amdgpu-waitcnt-forcevm=2 -o - %s | FileCheck -check-prefixes=GCN,ZERO %s
+
+# check that the waitcnt pass options that force insertion of waitcnt instructions are working as expected
+
+...
+# GCN-LABEL: name: waitcnt-debug
+# LGKM: S_WAITCNT 127
+# LGKM-NEXT: S_NOP 0
+# LGKM-NEXT: S_NOP 0
+
+# EXP: S_WAITCNT 3855
+# EXP-NEXT: S_NOP 0
+# EXP-NEXT: S_WAITCNT 3855
+# EXP-NEXT: S_NOP 0
+
+# VM: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+# VM-NEXT: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+# VM-NEXT: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+
+# ZERO: S_WAITCNT 0
+# ZERO-NEXT: S_WAITCNT 0
+# ZERO-NEXT: S_NOP 0
+# ZERO-NEXT: S_WAITCNT 0
+# ZERO-NEXT: S_NOP 0
+
+name:            waitcnt-debug
+liveins:
+body:             |
+  bb.0:
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+    S_NOP 0
+...
+




More information about the llvm-commits mailing list