[llvm] [AMDGPU] Update hasUnwantedEffectsWhenEXECEmpty (PR #97982)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 7 19:58:28 PDT 2024
https://github.com/perlfu created https://github.com/llvm/llvm-project/pull/97982
Add barriers and s_wait_event to hasUnwantedEffectsWhenEXECEmpty.
Add a comment documenting the current expected use of the function.
>From e072f1f8da7031f58a3d9918659d3fa1c68af55f Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Thu, 4 Jul 2024 14:10:33 +0900
Subject: [PATCH 1/2] [AMDGPU] Update EXECZ retention in SIPreEmitPeephole for
GFX10/12
The check to maintain EXECZ branches only checks S_WAITCNT.
Add handling for new waitcnt instructions in GFX10 and GFX12.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 23 ++
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 2 +-
.../CodeGen/AMDGPU/insert-skips-gfx10.mir | 186 +++++++++++
.../CodeGen/AMDGPU/insert-skips-gfx12.mir | 302 ++++++++++++++++++
4 files changed, 512 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1e2b687854c77a..b723deb9543cdc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -967,6 +967,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
}
}
+ bool isWaitcnt(unsigned Opcode) const {
+ switch (getNonSoftWaitcntOpcode(Opcode)) {
+ case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_KMCNT:
+ case AMDGPU::S_WAIT_IDLE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(isCopyInstr(MI));
Register Dest = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 875bccb208c846..1334029544f999 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -328,7 +328,7 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
- TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
+ TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
return true;
++NumInstr;
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
new file mode 100644
index 00000000000000..1d3132dbe2af2f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
@@ -0,0 +1,186 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: skip_waitcnt_vscnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_vscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_VSCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_VSCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_expcnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_expcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_EXPCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_vmcnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_vmcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_VMCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_VMCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_lgkmcnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_lgkmcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_LGKMCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_LGKMCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_idle
+body: |
+ ; CHECK-LABEL: name: skip_wait_idle
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_IDLE
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_IDLE
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_bvh
+body: |
+ ; CHECK-LABEL: name: skip_bvh
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+
+ bb.2:
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
new file mode 100644
index 00000000000000..c0b839d218a95f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -0,0 +1,302 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: skip_wait_loadcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_loadcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_LOADCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_LOADCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_loadcnt_dscnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_loadcnt_dscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_LOADCNT_DSCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_storecnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_storecnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_STORECNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_STORECNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_storecnt_dscnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_storecnt_dscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_STORECNT_DSCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_STORECNT_DSCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_samplecnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_samplecnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_SAMPLECNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_SAMPLECNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_bvhcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_bvhcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_BVHCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_BVHCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_expcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_expcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_EXPCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_EXPCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_dscnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_dscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_DSCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_DSCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_kmcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_kmcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_KMCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_KMCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_idle
+body: |
+ ; CHECK-LABEL: name: skip_wait_idle
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_IDLE
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_IDLE
+
+ bb.2:
+ S_ENDPGM 0
+...
>From 77103196d63c905c45172495b25be1300f11a4d8 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Mon, 8 Jul 2024 11:22:48 +0900
Subject: [PATCH 2/2] [AMDGPU] Update hasUnwantedEffectsWhenEXECEmpty
Add barriers and s_wait_event to hasUnwantedEffectsWhenEXECEmpty.
Add a comment documenting the current expected use of the function.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 +
.../CodeGen/AMDGPU/insert-skips-gfx10.mir | 30 ++
.../CodeGen/AMDGPU/insert-skips-gfx12.mir | 308 ++++++++++++++++++
4 files changed, 359 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index cc1b9ac0c9ecda..a2cb3834643227 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4118,6 +4118,13 @@ bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
}
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+ // This function is used to determine if an instruction can be safely
+ // executed under EXECZ without hardware error, indeterminate results,
+ // and/or visible effects on future vector execution or outside the shader.
+ // Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
+ // used in removing branches over short EXECZ sequences.
+ // As such it embeds certain assumptions which may not apply in every case
+ // of EXECZ execution.
unsigned Opcode = MI.getOpcode();
if (MI.mayStore() && isSMRD(MI))
@@ -4136,12 +4143,17 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
isEXP(Opcode) ||
Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
- Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
+ Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER ||
+ Opcode == AMDGPU::S_WAIT_EVENT)
return true;
if (MI.isCall() || MI.isInlineAsm())
return true; // conservative assumption
+ // Assume that barrier interactions are only intended with active lanes.
+ if (isBarrierRelated(Opcode))
+ return true;
+
// A mode change is a scalar operation that influences vector instructions.
if (modifiesModeRegister(MI))
return true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b723deb9543cdc..bee24b3a7a91b3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -936,6 +936,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
}
+ bool isBarrierRelated(unsigned Opcode) const {
+ return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
+ Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
+ Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
+ Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
+ Opcode == AMDGPU::S_BARRIER_LEAVE;
+ }
+
static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
index 1d3132dbe2af2f..b4ed3cafbacb5f 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
@@ -184,3 +184,33 @@ body: |
bb.2:
S_ENDPGM 0
...
+
+---
+name: skip_barrier
+body: |
+ ; CHECK-LABEL: name: skip_barrier
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER
+
+ bb.2:
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
index c0b839d218a95f..2d092974ac566f 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -300,3 +300,311 @@ body: |
bb.2:
S_ENDPGM 0
...
+
+---
+name: skip_wait_event
+body: |
+ ; CHECK-LABEL: name: skip_wait_event
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_EVENT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_EVENT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_SIGNAL_IMM -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_isfirst_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_M0 implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_SIGNAL_M0 implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_isfirst_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_wait
+body: |
+ ; CHECK-LABEL: name: skip_barrier_wait
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_WAIT -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_WAIT -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_init_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_INIT_IMM -1, implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_INIT_IMM -1, implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_init_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_INIT_M0 implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_INIT_M0 implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_join_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_join_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_JOIN_IMM -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_JOIN_IMM -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_leave
+body: |
+ ; CHECK-LABEL: name: skip_barrier_leave
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_LEAVE implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_LEAVE implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list