[PATCH] D146829: [AMDGPU] Remove unnecessary waitcnts
Jessica Del via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 24 10:39:57 PDT 2023
OutOfCache created this revision.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, yaxunl, jvesely, kzhuravl, arsenm.
Herald added a project: All.
OutOfCache requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.
Some ds_* instructions do not access LDS memory.
Therefore, the waitcnts can be removed.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D146829
Files:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir
llvm/test/CodeGen/AMDGPU/wqm.ll
Index: llvm/test/CodeGen/AMDGPU/wqm.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/wqm.ll
+++ llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -2216,7 +2216,6 @@
; GFX9-W64-NEXT: s_or_saveexec_b64 s[0:1], -1
; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2)
; GFX9-W64-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2
; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v0
; GFX9-W64-NEXT: .LBB36_2: ; %ENDIF
@@ -2246,7 +2245,6 @@
; GFX10-W32-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2)
; GFX10-W32-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2
; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v0
; GFX10-W32-NEXT: .LBB36_2: ; %ENDIF
@@ -2753,7 +2751,6 @@
; GFX9-W64-NEXT: s_or_saveexec_b64 s[0:1], -1
; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2)
; GFX9-W64-NEXT: s_mov_b64 exec, s[0:1]
-; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2
; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v0
; GFX9-W64-NEXT: .LBB45_2: ; %ENDIF
@@ -2783,7 +2780,6 @@
; GFX10-W32-NEXT: s_or_saveexec_b32 s0, -1
; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2)
; GFX10-W32-NEXT: s_mov_b32 exec_lo, s0
-; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2
; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v0
; GFX10-W32-NEXT: .LBB45_2: ; %ENDIF
@@ -2831,7 +2827,6 @@
; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
; GFX9-W64-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX9-W64-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2)
-; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-W64-NEXT: v_mov_b32_e32 v0, v2
; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v0, v0
; GFX9-W64-NEXT: .LBB46_2: ; %ENDIF
@@ -2855,7 +2850,6 @@
; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
; GFX10-W32-NEXT: v_cvt_i32_f32_e32 v2, v2
; GFX10-W32-NEXT: ds_swizzle_b32 v2, v2 offset:swizzle(SWAP,2)
-; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-W32-NEXT: v_mov_b32_e32 v0, v2
; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v0, v0
; GFX10-W32-NEXT: .LBB46_2: ; %ENDIF
Index: llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir
+++ llvm/test/CodeGen/AMDGPU/waitcnt-permute.mir
@@ -3,7 +3,7 @@
...
# CHECK-LABEL: name: waitcnt-permute{{$}}
# CHECK: DS_BPERMUTE_B32
-# CHECK-NEXT: S_WAITCNT 127
+# CHECK-NOT: S_WAITCNT 127
name: waitcnt-permute
liveins:
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
@@ -29,7 +29,7 @@
; CHECK-LABEL: {{^}}ds_bpermute_add_shl:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
-; CHECK: s_waitcnt lgkmcnt
+; CHECK-NOT: s_waitcnt lgkmcnt
define void @ds_bpermute_add_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
%index = add i32 %base_index, 1
%byte_index = shl i32 %index, 2
@@ -40,7 +40,7 @@
; CHECK-LABEL: {{^}}ds_bpermute_or_shl:
; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
-; CHECK: s_waitcnt lgkmcnt
+; CHECK-NOT: s_waitcnt lgkmcnt
define void @ds_bpermute_or_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
%masked = and i32 %base_index, 62
%index = or i32 %masked, 1
Index: llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1390,7 +1390,7 @@
// instruction, update the upper-bound of the appropriate counter's
// bracket and the destination operand scores.
// TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
- if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
+ if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst) && Inst.mayLoadOrStore()) {
if (TII->isAlwaysGDS(Inst.getOpcode()) ||
TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D146829.508156.patch
Type: text/x-patch
Size: 4381 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230324/e39b03f7/attachment.bin>
More information about the llvm-commits
mailing list