[llvm] r333926 - [AMDGPU][Waitcnt] Fix handling of flat instrs
Mark Searles via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 4 09:51:59 PDT 2018
Author: msearles
Date: Mon Jun 4 09:51:59 2018
New Revision: 333926
URL: http://llvm.org/viewvc/llvm-project?rev=333926&view=rev
Log:
[AMDGPU][Waitcnt] Fix handling of flat instrs
On GFX9 and earlier, flat memory ops may decrement VMCNT out-of-order as well as LGKMCNT out-of-order.
Differential Revision: https://reviews.llvm.org/D46616
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=333926&r1=333925&r2=333926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jun 4 09:51:59 2018
@@ -472,6 +472,10 @@ public:
return FlatScratchInsts;
}
+ bool hasFlatLgkmVMemCountInOrder() const {
+ return getGeneration() > GFX9;
+ }
+
bool hasD16LoadStore() const {
return getGeneration() >= GFX9;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=333926&r1=333925&r2=333926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Mon Jun 4 09:51:59 2018
@@ -136,7 +136,7 @@ enum RegisterMapping {
// "s_waitcnt 0" before use.
class BlockWaitcntBrackets {
public:
- BlockWaitcntBrackets() {
+ BlockWaitcntBrackets(const SISubtarget *SubTarget) : ST(SubTarget) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
memset(VgprScores[T], 0, sizeof(VgprScores[T]));
@@ -314,6 +314,7 @@ public:
void dump() { print(dbgs()); }
private:
+ const SISubtarget *ST = nullptr;
bool WaitAtBeginning = false;
bool RevisitLoop = false;
bool MixedExpTypes = false;
@@ -735,9 +736,12 @@ unsigned int BlockWaitcntBrackets::updat
const int32_t LB = getScoreLB(T);
const int32_t UB = getScoreUB(T);
if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
- if (T == VM_CNT && hasPendingFlat()) {
- // If there is a pending FLAT operation, and this is a VM waitcnt,
- // then we need to force a waitcnt 0 for VM.
+ if ((T == VM_CNT || T == LGKM_CNT) &&
+ hasPendingFlat() &&
+ !ST->hasFlatLgkmVMemCountInOrder()) {
+ // If there is a pending FLAT operation, and this is a VMem or LGKM
+ // waitcnt and the target can report early completion, then we need
+ // to force a waitcnt 0.
NeedWait = CNT_MASK(T);
setScoreLB(T, getScoreUB(T));
} else if (counterOutOfOrder(T)) {
@@ -1200,7 +1204,7 @@ void SIInsertWaitcnts::generateWaitcntIn
if (!ScoreBracket) {
assert(!BlockVisitedSet.count(TBB));
BlockWaitcntBracketsMap[TBB] =
- llvm::make_unique<BlockWaitcntBrackets>();
+ llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
}
ScoreBracket->setRevisitLoop(true);
@@ -1879,7 +1883,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
if (!ScoreBrackets) {
- BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>();
+ BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(ST);
ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
}
ScoreBrackets->setPostOrder(MBB.getNumber());
Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir?rev=333926&r1=333925&r2=333926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt.mir Mon Jun 4 09:51:59 2018
@@ -1,4 +1,5 @@
-# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waitcnts %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s
--- |
define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
@@ -30,22 +31,14 @@
# CHECK-LABEL: bb.1:
# CHECK: FLAT_LOAD_DWORD
-# CHECK: S_WAITCNT 368
+# GFX89: S_WAITCNT 112
# CHECK: FLAT_LOAD_DWORDX4
-# The first load has no mem operand, so we should assume it accesses the flat
-# address space.
-# s_waitcnt lgkmcnt(1)
-# CHECK-NEXT: S_WAITCNT 383
# CHECK-LABEL: bb.2:
# CHECK: FLAT_LOAD_DWORD
-# CHECK: S_WAITCNT 368
+# GFX89: S_WAITCNT 112
# CHECK: FLAT_LOAD_DWORDX4
-# One outstanding load accesses the flat address space.
-# s_waitcnt lgkmcnt(1)
-# CHECK-NEXT: S_WAITCNT 383
-
name: flat_zero_waitcnt
body: |
More information about the llvm-commits
mailing list