[llvm] r330862 - [AMDGPU] Waitcnt pass: add debug options
Mark Searles via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 25 12:21:26 PDT 2018
Author: msearles
Date: Wed Apr 25 12:21:26 2018
New Revision: 330862
URL: http://llvm.org/viewvc/llvm-project?rev=330862&view=rev
Log:
[AMDGPU] Waitcnt pass: add debug options
- Add "amdgpu-waitcnt-forcezero" to force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
- Add debug counters to control force emit of s_waitcnt instrs; debug counters:
si-insert-waitcnts-forceexp: force emit s_waitcnt expcnt(0) instrs
si-insert-waitcnts-forcevm: force emit s_waitcnt lgkmcnt(0) instrs
si-insert-waitcnts-forcelgkm: force emit s_waitcnt vmcnt(0) instrs
- Add some debug statements
Note that a variant of this patch was previously committed/reverted.
Differential Revision: https://reviews.llvm.org/D45888
Added:
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=330862&r1=330861&r2=330862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Apr 25 12:21:26 2018
@@ -40,6 +40,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -50,9 +51,21 @@
#include <utility>
#include <vector>
+using namespace llvm;
+
#define DEBUG_TYPE "si-insert-waitcnts"
-using namespace llvm;
+DEBUG_COUNTER(ForceExpCounter, DEBUG_TYPE"-forceexp",
+ "Force emit s_waitcnt expcnt(0) instrs");
+DEBUG_COUNTER(ForceLgkmCounter, DEBUG_TYPE"-forcelgkm",
+ "Force emit s_waitcnt lgkmcnt(0) instrs");
+DEBUG_COUNTER(ForceVMCounter, DEBUG_TYPE"-forcevm",
+ "Force emit s_waitcnt vmcnt(0) instrs");
+
+static cl::opt<unsigned> ForceEmitZeroFlag(
+ "amdgpu-waitcnt-forcezero",
+ cl::desc("Force all waitcnt instrs to be emitted as s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"),
+ cl::init(0), cl::Hidden);
namespace {
@@ -373,6 +386,9 @@ private:
std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
+ bool ForceEmitZeroWaitcnt;
+ bool ForceEmitWaitcnt[NUM_INST_CNTS];
+
public:
static char ID;
@@ -397,6 +413,41 @@ public:
llvm::make_unique<BlockWaitcntBrackets>(*Bracket));
}
+ bool isForceEmitWaitcnt() const {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1))
+ if (ForceEmitWaitcnt[T])
+ return true;
+ return false;
+ }
+
+ void setForceEmitWaitcnt() {
+// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
+// For debug builds, get the debug counter info and adjust if need be
+#ifndef NDEBUG
+ if (DebugCounter::isCounterSet(ForceExpCounter) &&
+ DebugCounter::shouldExecute(ForceExpCounter)) {
+ ForceEmitWaitcnt[EXP_CNT] = true;
+ } else {
+ ForceEmitWaitcnt[EXP_CNT] = false;
+ }
+
+ if (DebugCounter::isCounterSet(ForceLgkmCounter) &&
+ DebugCounter::shouldExecute(ForceLgkmCounter)) {
+ ForceEmitWaitcnt[LGKM_CNT] = true;
+ } else {
+ ForceEmitWaitcnt[LGKM_CNT] = false;
+ }
+
+ if (DebugCounter::isCounterSet(ForceVMCounter) &&
+ DebugCounter::shouldExecute(ForceVMCounter)) {
+ ForceEmitWaitcnt[VM_CNT] = true;
+ } else {
+ ForceEmitWaitcnt[VM_CNT] = false;
+ }
+#endif // NDEBUG
+ }
+
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
void generateWaitcntInstBefore(MachineInstr &MI,
BlockWaitcntBrackets *ScoreBrackets);
@@ -836,6 +887,10 @@ void SIInsertWaitcnts::generateWaitcntIn
// sc-merge. It is not a coincident that all these cases correspond to the
// instructions that are skipped in the assembling loop.
bool NeedLineMapping = false; // TODO: Check on this.
+ setForceEmitWaitcnt();
+
+ bool IsForceEmitWaitcnt = isForceEmitWaitcnt();
+
if (MI.isDebugValue() &&
// TODO: any other opcode?
!NeedLineMapping) {
@@ -1047,9 +1102,6 @@ void SIInsertWaitcnts::generateWaitcntIn
} // End of for loop that looks at all dest operands.
}
- // TODO: Tie force zero to a compiler triage option.
- bool ForceZero = false;
-
// Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
// occurs before the instruction. Doing it here prevents any additional
// S_WAITCNTs from being emitted if the instruction was marked as
@@ -1076,17 +1128,17 @@ void SIInsertWaitcnts::generateWaitcntIn
// block, so if we only wait on LGKM here, we might end up with
// another s_waitcnt inserted right after this if there are non-LGKM
// instructions still outstanding.
- ForceZero = true;
+ ForceEmitZeroWaitcnt = true;
EmitWaitcnt = true;
}
}
// Does this operand processing indicate s_wait counter update?
- if (EmitWaitcnt) {
+ if (EmitWaitcnt || IsForceEmitWaitcnt) {
int CntVal[NUM_INST_CNTS];
bool UseDefaultWaitcntStrategy = true;
- if (ForceZero) {
+ if (ForceEmitZeroWaitcnt) {
// Force all waitcnts to 0.
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
@@ -1123,7 +1175,7 @@ void SIInsertWaitcnts::generateWaitcntIn
}
// If we are not waiting on any counter we can skip the wait altogether.
- if (EmitWaitcnt != 0) {
+ if (EmitWaitcnt != 0 || IsForceEmitWaitcnt) {
MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
if (!OldWaitcnt ||
@@ -1151,8 +1203,10 @@ void SIInsertWaitcnts::generateWaitcntIn
}
// Update an existing waitcount, or make a new one.
- unsigned Enc = AMDGPU::encodeWaitcnt(IV, CntVal[VM_CNT],
- CntVal[EXP_CNT], CntVal[LGKM_CNT]);
+ unsigned Enc = AMDGPU::encodeWaitcnt(IV,
+ ForceEmitWaitcnt[VM_CNT] ? 0 : CntVal[VM_CNT],
+ ForceEmitWaitcnt[EXP_CNT] ? 0 : CntVal[EXP_CNT],
+ ForceEmitWaitcnt[LGKM_CNT] ? 0 : CntVal[LGKM_CNT]);
// We don't remove waitcnts that existed prior to the waitcnt
// pass. Check if the waitcnt to-be-inserted can be avoided
// or if the prev waitcnt can be updated.
@@ -1178,6 +1232,11 @@ void SIInsertWaitcnts::generateWaitcntIn
}
if (insertSWaitInst) {
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
+ if (ForceEmitZeroWaitcnt)
+ DEBUG(dbgs() << "Force emit s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)\n");
+ if (IsForceEmitWaitcnt)
+ DEBUG(dbgs() << "Force emit a s_waitcnt due to debug counter\n");
+
OldWaitcnt->getOperand(0).setImm(Enc);
if (!OldWaitcnt->getParent())
MI.getParent()->insert(MI, OldWaitcnt);
@@ -1603,7 +1662,7 @@ void SIInsertWaitcnts::insertWaitcntInBl
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
DEBUG({
- dbgs() << "Block" << Block.getNumber();
+ dbgs() << "*** Block" << Block.getNumber() << " ***";
ScoreBrackets->dump();
});
@@ -1769,6 +1828,11 @@ bool SIInsertWaitcnts::runOnMachineFunct
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
AMDGPUASI = ST->getAMDGPUAS();
+ ForceEmitZeroWaitcnt = ForceEmitZeroFlag;
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1))
+ ForceEmitWaitcnt[T] = false;
+
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
Added: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir?rev=330862&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-debug.mir Wed Apr 25 12:21:26 2018
@@ -0,0 +1,41 @@
+# REQUIRES: asserts
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -debug-counter=si-insert-waitcnts-forcelgkm-count=1 -o - %s | FileCheck -check-prefixes=GCN,LGKM %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -debug-counter=si-insert-waitcnts-forceexp-count=2 -o - %s | FileCheck -check-prefixes=GCN,EXP %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -debug-counter=si-insert-waitcnts-forcevm-count=3 -o - %s | FileCheck -check-prefixes=GCN,VM %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 -debug-counter=si-insert-waitcnts-forcevm-count=2 -o - %s | FileCheck -check-prefixes=GCN,ZERO %s
+
+# check that the waitcnt pass options that force insertion of waitcnt instructions are working as expected
+
+...
+# GCN-LABEL: name: waitcnt-debug
+# LGKM: S_WAITCNT 127
+# LGKM-NEXT: S_NOP 0
+# LGKM-NEXT: S_NOP 0
+
+# EXP: S_WAITCNT 3855
+# EXP-NEXT: S_NOP 0
+# EXP-NEXT: S_WAITCNT 3855
+# EXP-NEXT: S_NOP 0
+
+# VM: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+# VM-NEXT: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+# VM-NEXT: S_WAITCNT 3952
+# VM-NEXT: S_NOP 0
+
+# ZERO: S_WAITCNT 0
+# ZERO-NEXT: S_WAITCNT 0
+# ZERO-NEXT: S_NOP 0
+# ZERO-NEXT: S_WAITCNT 0
+# ZERO-NEXT: S_NOP 0
+
+name: waitcnt-debug
+liveins:
+body: |
+ bb.0:
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+ S_NOP 0
+...
More information about the llvm-commits
mailing list