[llvm] [AMDGPU] Update EXECZ retention in SIPreEmitPeephole for GFX11/12 (PR #97676)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 22:15:35 PDT 2024
https://github.com/perlfu created https://github.com/llvm/llvm-project/pull/97676
The check to maintain EXECZ branches only checks S_WAITCNT.
Add handling for new waitcnt instructions in GFX11 and GFX12.
Also add code to retain uniform jumps over barrier instructions.
>From 60ca22033dfb50696cce9fc954ea021061752ff5 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Thu, 4 Jul 2024 14:10:33 +0900
Subject: [PATCH] [AMDGPU] Update EXECZ retention in SIPreEmitPeephole for
GFX11/12
The check to maintain EXECZ branches only checks S_WAITCNT.
Add handling for new waitcnt instructions in GFX11 and GFX12.
Also add code to retain uniform jumps over barrier instructions.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 31 +
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 6 +-
.../CodeGen/AMDGPU/insert-skips-gfx11.mir | 216 +++++++
.../CodeGen/AMDGPU/insert-skips-gfx12.mir | 580 ++++++++++++++++++
4 files changed, 832 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 84bb73cc9a796..6ea4eff53b257 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -936,6 +936,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
}
+ bool isBarrierRelated(unsigned Opcode) const {
+ return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
+ Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
+ Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
+ Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
+ Opcode == AMDGPU::S_BARRIER_LEAVE;
+ }
+
static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
@@ -967,6 +975,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
}
}
+ bool isWaitcnt(unsigned Opcode) const {
+ switch (getNonSoftWaitcntOpcode(Opcode)) {
+ case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_KMCNT:
+ case AMDGPU::S_WAIT_IDLE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(isCopyInstr(MI));
Register Dest = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 875bccb208c84..3cf7395e1e4c6 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -328,7 +328,11 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
- TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
+ TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
+ return true;
+
+ // Uniform bypass of barriers should be respected.
+ if (TII->isBarrierRelated(MI.getOpcode()))
return true;
++NumInstr;
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir
new file mode 100644
index 0000000000000..ff0c9a7b9dbc0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir
@@ -0,0 +1,216 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: skip_waitcnt_vscnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_vscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_VSCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_VSCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_expcnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_expcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_EXPCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_vmcnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_vmcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_VMCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_VMCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_lgkmcnt
+body: |
+ ; CHECK-LABEL: name: skip_waitcnt_lgkmcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_LGKMCNT $sgpr_null, 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAITCNT_LGKMCNT $sgpr_null, 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_idle
+body: |
+ ; CHECK-LABEL: name: skip_wait_idle
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_IDLE
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_IDLE
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier
+body: |
+ ; CHECK-LABEL: name: skip_barrier
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_bvh
+body: |
+ ; CHECK-LABEL: name: skip_bvh
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
+ $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+ $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+
+ bb.2:
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
new file mode 100644
index 0000000000000..f0bf3d73df2cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -0,0 +1,580 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: skip_wait_loadcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_loadcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_LOADCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_LOADCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_loadcnt_dscnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_loadcnt_dscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_LOADCNT_DSCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_storecnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_storecnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_STORECNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_STORECNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_storecnt_dscnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_storecnt_dscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_STORECNT_DSCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_STORECNT_DSCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_samplecnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_samplecnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_SAMPLECNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_SAMPLECNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_bvhcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_bvhcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_BVHCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_BVHCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_expcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_expcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_EXPCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_EXPCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_dscnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_dscnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_DSCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_DSCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_kmcnt
+body: |
+ ; CHECK-LABEL: name: skip_wait_kmcnt
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_KMCNT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_KMCNT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_wait_idle
+body: |
+ ; CHECK-LABEL: name: skip_wait_idle
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_IDLE
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_IDLE
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_SIGNAL_IMM -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_isfirst_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_M0 implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_SIGNAL_M0 implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_isfirst_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_wait
+body: |
+ ; CHECK-LABEL: name: skip_barrier_wait
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_WAIT -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_WAIT -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_init_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_INIT_IMM -1, implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_INIT_IMM -1, implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_init_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_INIT_M0 implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_INIT_M0 implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_join_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_join_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_JOIN_IMM -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_JOIN_IMM -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_leave
+body: |
+ ; CHECK-LABEL: name: skip_barrier_leave
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_LEAVE implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_LEAVE implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list