[PATCH] D46616: [AMDGPU][Waitcnt] Fix handling of flat instrs
Mark Searles via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue May 8 18:49:54 PDT 2018
msearles updated this revision to Diff 145838.
msearles added a comment.
Update comment as suggested by reviewer.
https://reviews.llvm.org/D46616
Files:
lib/Target/AMDGPU/AMDGPUSubtarget.h
lib/Target/AMDGPU/SIInsertWaitcnts.cpp
test/CodeGen/AMDGPU/waitcnt.mir
Index: test/CodeGen/AMDGPU/waitcnt.mir
===================================================================
--- test/CodeGen/AMDGPU/waitcnt.mir
+++ test/CodeGen/AMDGPU/waitcnt.mir
@@ -30,22 +30,14 @@
# CHECK-LABEL: bb.1:
# CHECK: FLAT_LOAD_DWORD
-# CHECK: S_WAITCNT 368
+# CHECK: S_WAITCNT 112
# CHECK: FLAT_LOAD_DWORDX4
-# The first load has no mem operand, so we should assume it accesses the flat
-# address space.
-# s_waitcnt lgkmcnt(1)
-# CHECK-NEXT: S_WAITCNT 383
# CHECK-LABEL: bb.2:
# CHECK: FLAT_LOAD_DWORD
-# CHECK: S_WAITCNT 368
+# CHECK: S_WAITCNT 112
# CHECK: FLAT_LOAD_DWORDX4
-# One outstanding load accesses the flat address space.
-# s_waitcnt lgkmcnt(1)
-# CHECK-NEXT: S_WAITCNT 383
-
name: flat_zero_waitcnt
body: |
Index: lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -136,7 +136,7 @@
// "s_waitcnt 0" before use.
class BlockWaitcntBrackets {
public:
- BlockWaitcntBrackets() {
+ BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
@@ -314,6 +314,7 @@
void dump() { print(dbgs()); }
private:
+ const SISubtarget *ST = nullptr;
bool WaitAtBeginning = false;
bool RevisitLoop = false;
bool MixedExpTypes = false;
@@ -737,9 +738,12 @@
const int32_t LB = getScoreLB(T);
const int32_t UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
- if (T == VM_CNT && hasPendingFlat()) {
- // If there is a pending FLAT operation, and this is a VM waitcnt,
- // then we need to force a waitcnt 0 for VM.
+ if ((T == VM_CNT || T == LGKM_CNT) &&
+ hasPendingFlat() &&
+ !ST->hasFlatLgkmVMemCountInOrder()) {
+ // If there is a pending FLAT operation, and this is a VMem or LGKM
+ // waitcnt and the target can report early completion, then we need
+ // to force a waitcnt 0.
NeedWait = CNT_MASK(T);
setScoreLB(T, getScoreUB(T));
} else if (counterOutOfOrder(T)) {
@@ -1202,7 +1206,7 @@
if (!ScoreBracket) {
assert(!BlockVisitedSet.count(TBB));
BlockWaitcntBracketsMap[TBB] =
- llvm::make_unique<BlockWaitcntBrackets>();
+ llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
}
ScoreBracket->setRevisitLoop(true);
@@ -1877,7 +1881,7 @@
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
if (!ScoreBrackets) {
- BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>();
+ BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
}
ScoreBrackets->setPostOrder(MBB.getNumber());
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -476,6 +476,10 @@
return FlatScratchInsts;
}
+ bool hasFlatLgkmVMemCountInOrder() const {
+ return getGeneration() > GFX9;
+ }
+
bool hasD16LoadStore() const {
return getGeneration() >= GFX9;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D46616.145838.patch
Type: text/x-patch
Size: 3449 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180509/bb5315cc/attachment.bin>
More information about the llvm-commits
mailing list