[PATCH] D46616: [AMDGPU][Waitcnt] Fix handling of flat instrs
Mark Searles via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 8 17:43:09 PDT 2018
msearles created this revision.
msearles added reviewers: arsenm, rampitec.
msearles added a project: AMDGPU.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, kzhuravl.
On GFX9 and earlier, flat memory ops may decrement VMCNT out-of-order as well as LGKMCNT out-of-order.
https://reviews.llvm.org/D46616
Files:
lib/Target/AMDGPU/AMDGPUSubtarget.h
lib/Target/AMDGPU/SIInsertWaitcnts.cpp
test/CodeGen/AMDGPU/waitcnt.mir
Index: test/CodeGen/AMDGPU/waitcnt.mir
===================================================================
--- test/CodeGen/AMDGPU/waitcnt.mir
+++ test/CodeGen/AMDGPU/waitcnt.mir
@@ -30,22 +30,14 @@
# CHECK-LABEL: bb.1:
# CHECK: FLAT_LOAD_DWORD
-# CHECK: S_WAITCNT 368
+# CHECK: S_WAITCNT 112
# CHECK: FLAT_LOAD_DWORDX4
-# The first load has no mem operand, so we should assume it accesses the flat
-# address space.
-# s_waitcnt lgkmcnt(1)
-# CHECK-NEXT: S_WAITCNT 383
# CHECK-LABEL: bb.2:
# CHECK: FLAT_LOAD_DWORD
-# CHECK: S_WAITCNT 368
+# CHECK: S_WAITCNT 112
# CHECK: FLAT_LOAD_DWORDX4
-# One outstanding load accesses the flat address space.
-# s_waitcnt lgkmcnt(1)
-# CHECK-NEXT: S_WAITCNT 383
-
name: flat_zero_waitcnt
body: |
Index: lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -136,7 +136,7 @@
// "s_waitcnt 0" before use.
class BlockWaitcntBrackets {
public:
- BlockWaitcntBrackets() {
+ BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
@@ -314,6 +314,7 @@
void dump() { print(dbgs()); }
private:
+ const SISubtarget *ST = nullptr;
bool WaitAtBeginning = false;
bool RevisitLoop = false;
bool MixedExpTypes = false;
@@ -737,7 +738,9 @@
const int32_t LB = getScoreLB(T);
const int32_t UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
- if (T == VM_CNT && hasPendingFlat()) {
+ if ((T == VM_CNT || T == LGKM_CNT) &&
+ hasPendingFlat() &&
+ !ST->hasFlatLgkmVMemCountInOrder()) {
// If there is a pending FLAT operation, and this is a VM waitcnt,
// then we need to force a waitcnt 0 for VM.
NeedWait = CNT_MASK(T);
@@ -1202,7 +1205,7 @@
if (!ScoreBracket) {
assert(!BlockVisitedSet.count(TBB));
BlockWaitcntBracketsMap[TBB] =
- llvm::make_unique<BlockWaitcntBrackets>();
+ llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
}
ScoreBracket->setRevisitLoop(true);
@@ -1877,7 +1880,7 @@
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
if (!ScoreBrackets) {
- BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>();
+ BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
}
ScoreBrackets->setPostOrder(MBB.getNumber());
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -476,6 +476,10 @@
return FlatScratchInsts;
}
+ bool hasFlatLgkmVMemCountInOrder() const {
+ return getGeneration() > GFX9;
+ }
+
bool hasD16LoadStore() const {
return getGeneration() >= GFX9;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46616.145829.patch
Type: text/x-patch
Size: 3189 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180509/c6a7991f/attachment.bin>
More information about the llvm-commits
mailing list