[llvm] r304571 - AMDGPU: Make auto waitcnt before barrier a feature
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 2 10:40:27 PDT 2017
Author: kzhuravl
Date: Fri Jun 2 12:40:26 2017
New Revision: 304571
URL: http://llvm.org/viewvc/llvm-project?rev=304571&view=rev
Log:
AMDGPU: Make auto waitcnt before barrier a feature
Differential Revision: https://reviews.llvm.org/D33793
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=304571&r1=304570&r2=304571&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Fri Jun 2 12:40:26 2017
@@ -365,6 +365,13 @@ def FeatureFlatForGlobal : SubtargetFeat
"Force to generate flat instruction for global"
>;
+def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
+ "auto-waitcnt-before-barrier",
+ "AutoWaitcntBeforeBarrier",
+ "true",
+ "Hardware automatically inserts waitcnt before barrier"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=304571&r1=304570&r2=304571&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Jun 2 12:40:26 2017
@@ -91,6 +91,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
FPExceptions(false),
DX10Clamp(false),
FlatForGlobal(false),
+ AutoWaitcntBeforeBarrier(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=304571&r1=304570&r2=304571&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jun 2 12:40:26 2017
@@ -110,6 +110,7 @@ protected:
bool FPExceptions;
bool DX10Clamp;
bool FlatForGlobal;
+ bool AutoWaitcntBeforeBarrier;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool HasApertureRegs;
@@ -364,6 +365,10 @@ public:
return FlatForGlobal;
}
+ bool hasAutoWaitcntBeforeBarrier() const {
+ return AutoWaitcntBeforeBarrier;
+ }
+
bool hasUnalignedBufferAccess() const {
return UnalignedBufferAccess;
}
@@ -728,12 +733,6 @@ public:
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
- /// \returns True if waitcnt instruction is needed before barrier instruction,
- /// false otherwise.
- bool needWaitcntBeforeBarrier() const {
- return true;
- }
-
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=304571&r1=304570&r2=304571&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Fri Jun 2 12:40:26 2017
@@ -1009,7 +1009,8 @@ MachineInstr *SIInsertWaitcnts::generate
// occurs before the instruction. Doing it here prevents any additional
// S_WAITCNTs from being emitted if the instruction was marked as
// requiring a WAITCNT beforehand.
- if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) {
+ if (MI.getOpcode() == AMDGPU::S_BARRIER &&
+ !ST->hasAutoWaitcntBeforeBarrier()) {
EmitSwaitcnt |=
ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
EmitSwaitcnt |= ScoreBrackets->updateByWait(
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp?rev=304571&r1=304570&r2=304571&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Fri Jun 2 12:40:26 2017
@@ -630,7 +630,7 @@ bool SIInsertWaits::runOnMachineFunction
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
- ST->needWaitcntBeforeBarrier()) ||
+ !ST->hasAutoWaitcntBeforeBarrier()) ||
I->getOpcode() == AMDGPU::S_SENDMSG ||
I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll?rev=304571&r1=304570&r2=304571&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll Fri Jun 2 12:40:26 2017
@@ -1,10 +1,13 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s
+; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s
; GCN-LABEL: {{^}}test_barrier:
; GFX8: buffer_store_dword
; GFX9: flat_store_dword
-; GCN: s_waitcnt
+; NOAUTO: s_waitcnt
+; AUTO-NOT: s_waitcnt
; GCN: s_barrier
define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
entry:
More information about the llvm-commits
mailing list