[PATCH] D20556: AMDGPU: Skip waiting on lgkmcnt for global flat loads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 23 21:18:48 PDT 2016
arsenm created this revision.
arsenm added a reviewer: tstellarAMD.
arsenm added a subscriber: llvm-commits.
Herald added subscribers: kzhuravl, arsenm.
If we know the access isn't to a flat address,
the wait for LDS is not necessary.
http://reviews.llvm.org/D20556
Files:
lib/Target/AMDGPU/SIInsertWaits.cpp
test/CodeGen/AMDGPU/waitcnt-flat.ll
Index: test/CodeGen/AMDGPU/waitcnt-flat.ll
===================================================================
--- test/CodeGen/AMDGPU/waitcnt-flat.ll
+++ test/CodeGen/AMDGPU/waitcnt-flat.ll
@@ -5,12 +5,52 @@
; operand, this test is not broken. It just means it is no longer testing
; for the original bug.
-; GCN: {{^}}test:
+; GCN-LABEL: {{^}}global_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_load_dword
+
+; Test pointer problem
; XGCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]]
; XGCN: s_waitcnt vmcnt(0) lgkmcnt(0)
; XGCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}]
-define void @test(i32 addrspace(1)* %out, i32 %in) {
+define void @global_test(i32 addrspace(1)* %out, i32 %in) {
store volatile i32 0, i32 addrspace(1)* %out
%val = load volatile i32, i32 addrspace(1)* %out
ret void
}
+
+; GCN-LABEL: {{^}}flat_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword
+define void @flat_test(i32 addrspace(4)* %out, i32 %in) {
+ store volatile i32 0, i32 addrspace(4)* %out
+ %val = load volatile i32, i32 addrspace(4)* %out
+ ret void
+}
+
+; If the store is not through a generic pointer, the lgkmcnt is not
+; needed.
+
+; GCN-LABEL: {{^}}global_flat_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: flat_load_dword
+define void @global_flat_test(i32 addrspace(1)* %out, i32 %in) {
+ store volatile i32 0, i32 addrspace(1)* %out
+ %out.cast = addrspacecast i32 addrspace(1)* %out to i32 addrspace(4)*
+ %val = load volatile i32, i32 addrspace(4)* %out.cast
+ ret void
+}
+
+; GCN-LABEL: {{^}}flat_global_test:
+; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}],
+; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; GCN: flat_load_dword
+define void @flat_global_test(i32 addrspace(1)* %out, i32 %in) {
+ %out.cast = addrspacecast i32 addrspace(1)* %out to i32 addrspace(4)*
+ store volatile i32 0, i32 addrspace(4)* %out.cast
+ %val = load volatile i32, i32 addrspace(1)* %out
+ ret void
+}
Index: lib/Target/AMDGPU/SIInsertWaits.cpp
===================================================================
--- lib/Target/AMDGPU/SIInsertWaits.cpp
+++ lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -179,6 +179,16 @@
return WaitedOn.Named.LGKM != LastIssued.Named.LGKM;
}
+static bool hasGlobalMemOperand(const MachineInstr &MI) {
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ unsigned AS = MMO->getAddrSpace();
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
uint64_t TSFlags = MI.getDesc().TSFlags;
Counters Result = { { 0, 0, 0 } };
@@ -208,6 +218,11 @@
// XXX - What is the right value?
Result.Named.LGKM = 1;
}
+ } else if (TII->isFLAT(MI)) {
+ // If we know the pointer is not accessing a flat address, we don't need
+ // to wait for lgkm.
+ if (!hasGlobalMemOperand(MI))
+ Result.Named.LGKM = 1;
} else {
// DS
Result.Named.LGKM = 1;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20556.58192.patch
Type: text/x-patch
Size: 3178 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160524/0c103e9e/attachment.bin>
More information about the llvm-commits
mailing list