[PATCH] D20556: AMDGPU: Skip waiting on lgkmcnt for global flat loads

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon May 23 21:18:48 PDT 2016


arsenm created this revision.
arsenm added a reviewer: tstellarAMD.
arsenm added a subscriber: llvm-commits.
Herald added subscribers: kzhuravl, arsenm.

If we know the access isn't to a flat address,
the wait for LDS is not necessary.

http://reviews.llvm.org/D20556

Files:
  lib/Target/AMDGPU/SIInsertWaits.cpp
  test/CodeGen/AMDGPU/waitcnt-flat.ll

Index: test/CodeGen/AMDGPU/waitcnt-flat.ll
===================================================================
--- test/CodeGen/AMDGPU/waitcnt-flat.ll
+++ test/CodeGen/AMDGPU/waitcnt-flat.ll
@@ -5,12 +5,52 @@
 ; operand, this test is not broken.  It just means it is no longer testing
 ; for the original bug.
 
-; GCN: {{^}}test:
+; GCN-LABEL: {{^}}global_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_load_dword
+
+; Test pointer problem
 ; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]]
 ; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0)
 ; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}]
-define void @test(i32 addrspace(1)* %out, i32 %in) {
+define void @global_test(i32 addrspace(1)* %out, i32 %in) {
   store volatile i32 0, i32 addrspace(1)* %out
   %val = load volatile i32, i32 addrspace(1)* %out
   ret void
 }
+
+; GCN-LABEL: {{^}}flat_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword
+define void @flat_test(i32 addrspace(4)* %out, i32 %in) {
+  store volatile i32 0, i32 addrspace(4)* %out
+  %val = load volatile i32, i32 addrspace(4)* %out
+  ret void
+}
+
+; If the store is not through a generic pointer, the lgkmcnt is not
+; needed.
+
+; GCN-LABEL: {{^}}global_flat_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_load_dword
+define void @global_flat_test(i32 addrspace(1)* %out, i32 %in) {
+  store volatile i32 0, i32 addrspace(1)* %out
+  %out.cast = addrspacecast i32 addrspace(1)* %out to i32 addrspace(4)*
+  %val = load volatile i32, i32 addrspace(4)* %out.cast
+  ret void
+}
+
+; GCN-LABEL: {{^}}flat_global_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword
+define void @flat_global_test(i32 addrspace(1)* %out, i32 %in) {
+  %out.cast = addrspacecast i32 addrspace(1)* %out to i32 addrspace(4)*
+  store volatile i32 0, i32 addrspace(4)* %out.cast
+  %val = load volatile i32, i32 addrspace(1)* %out
+  ret void
+}
Index: lib/Target/AMDGPU/SIInsertWaits.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaits.cpp
+++ lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -179,6 +179,16 @@
   return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
 }
 
+static bool hasGlobalMemOperand(const MachineInstr &MI) {
+  if (!MI.hasOneMemOperand())
+    return false;
+
+  MachineMemOperand *MMO = *MI.memoperands_begin();
+  unsigned AS = MMO->getAddrSpace();
+  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
   uint64_t TSFlags = MI.getDesc().TSFlags;
   Counters Result = { { 0, 0, 0 } };
@@ -208,6 +218,11 @@
         // XXX - What is the right value?
         Result.Named.LGKM = 1;
       }
+    } else if (TII->isFLAT(MI)) {
+      // If we know the pointer is not accessing a flat address, we don't need
+      // to wait for lgkm.
+      if (!hasGlobalMemOperand(MI))
+        Result.Named.LGKM = 1;
     } else {
       // DS
       Result.Named.LGKM = 1;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20556.58192.patch
Type: text/x-patch
Size: 3178 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160524/0c103e9e/attachment.bin>


More information about the llvm-commits mailing list