[PATCH] R600/SI: Insert s_waitcnt before s_barrier instructions.

Tom Stellard thomas.stellard at amd.com
Mon Dec 29 12:14:25 PST 2014


This ensures that all memory operations are complete when all threads
reach the barrier.
---
 lib/Target/R600/SIInsertWaits.cpp               | 6 +++++-
 test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll | 4 +++-
 test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll  | 5 ++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 2e56508..181b116 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -428,7 +428,11 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
 
-      Changes |= insertWait(MBB, I, handleOperands(*I));
+      // Wait for everything before a barrier.
+      if (I->getOpcode() == AMDGPU::S_BARRIER)
+        Changes |= insertWait(MBB, I, LastIssued);
+      else
+        Changes |= insertWait(MBB, I, handleOperands(*I));
       pushInstruction(MBB, I);
     }
 
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index 98f6695..b20457e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -3,7 +3,9 @@
 
 ; FUNC-LABEL: {{^}}test_barrier_global:
 ; EG: GROUP_BARRIER
-; SI: s_barrier
+; SI: buffer_store_dword
+; SI: s_waitcnt
+; SI-NEXT: s_barrier
 
 define void @test_barrier_global(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 92fe9f2..ca3c8b8 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -3,7 +3,10 @@
 
 ; FUNC-LABEL: {{^}}test_barrier_local:
 ; EG: GROUP_BARRIER
-; SI: s_barrier
+
+; SI: buffer_store_dword
+; SI: s_waitcnt
+; SI-NEXT: s_barrier
 
 define void @test_barrier_local(i32 addrspace(1)* %out) {
 entry:
-- 
1.8.5.5




More information about the llvm-commits mailing list