[llvm] [AMDGPU] Update EXECZ retention in SIPreEmitPeephole for GFX11/12 (PR #97676)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 3 22:15:35 PDT 2024


https://github.com/perlfu created https://github.com/llvm/llvm-project/pull/97676

The check to maintain EXECZ branches only checks S_WAITCNT.
Add handling for new waitcnt instructions in GFX11 and GFX12.
Also add code to retain uniform jumps over barrier instructions.

>From 60ca22033dfb50696cce9fc954ea021061752ff5 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Thu, 4 Jul 2024 14:10:33 +0900
Subject: [PATCH] [AMDGPU] Update EXECZ retention in SIPreEmitPeephole for
 GFX11/12

The check to maintain EXECZ branches only checks S_WAITCNT.
Add handling for new waitcnt instructions in GFX11 and GFX12.
Also add code to retain uniform jumps over barrier instructions.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |  31 +
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  |   6 +-
 .../CodeGen/AMDGPU/insert-skips-gfx11.mir     | 216 +++++++
 .../CodeGen/AMDGPU/insert-skips-gfx12.mir     | 580 ++++++++++++++++++
 4 files changed, 832 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 84bb73cc9a796..6ea4eff53b257 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -936,6 +936,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
   }
 
+  bool isBarrierRelated(unsigned Opcode) const {
+    return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
+           Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
+           Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
+           Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
+           Opcode == AMDGPU::S_BARRIER_LEAVE;
+  }
+
   static bool doesNotReadTiedSource(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
   }
@@ -967,6 +975,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     }
   }
 
+  bool isWaitcnt(unsigned Opcode) const {
+    switch (getNonSoftWaitcntOpcode(Opcode)) {
+    case AMDGPU::S_WAITCNT:
+    case AMDGPU::S_WAITCNT_VSCNT:
+    case AMDGPU::S_WAITCNT_VMCNT:
+    case AMDGPU::S_WAITCNT_EXPCNT:
+    case AMDGPU::S_WAITCNT_LGKMCNT:
+    case AMDGPU::S_WAIT_LOADCNT:
+    case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+    case AMDGPU::S_WAIT_STORECNT:
+    case AMDGPU::S_WAIT_STORECNT_DSCNT:
+    case AMDGPU::S_WAIT_SAMPLECNT:
+    case AMDGPU::S_WAIT_BVHCNT:
+    case AMDGPU::S_WAIT_EXPCNT:
+    case AMDGPU::S_WAIT_DSCNT:
+    case AMDGPU::S_WAIT_KMCNT:
+    case AMDGPU::S_WAIT_IDLE:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   bool isVGPRCopy(const MachineInstr &MI) const {
     assert(isCopyInstr(MI));
     Register Dest = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 875bccb208c84..3cf7395e1e4c6 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -328,7 +328,11 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
 
       // These instructions are potentially expensive even if EXEC = 0.
       if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
-          TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
+          TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
+        return true;
+
+      // Uniform bypass of barriers should be respected.
+      if (TII->isBarrierRelated(MI.getOpcode()))
         return true;
 
       ++NumInstr;
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir
new file mode 100644
index 0000000000000..ff0c9a7b9dbc0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx11.mir
@@ -0,0 +1,216 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
+
+---
+name: skip_waitcnt_vscnt
+body: |
+  ; CHECK-LABEL: name: skip_waitcnt_vscnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAITCNT_VSCNT $sgpr_null, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAITCNT_VSCNT $sgpr_null, 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_expcnt
+body: |
+  ; CHECK-LABEL: name: skip_waitcnt_expcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAITCNT_EXPCNT $sgpr_null, 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_vmcnt
+body: |
+  ; CHECK-LABEL: name: skip_waitcnt_vmcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAITCNT_VMCNT $sgpr_null, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAITCNT_VMCNT $sgpr_null, 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_waitcnt_lgkmcnt
+body: |
+  ; CHECK-LABEL: name: skip_waitcnt_lgkmcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAITCNT_LGKMCNT $sgpr_null, 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAITCNT_LGKMCNT $sgpr_null, 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_idle
+body: |
+  ; CHECK-LABEL: name: skip_wait_idle
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_IDLE
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_IDLE
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier
+body: |
+  ; CHECK-LABEL: name: skip_barrier
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_BARRIER
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_BARRIER
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_bvh
+body: |
+  ; CHECK-LABEL: name: skip_bvh
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+  ; CHECK-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
+    $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+    $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
+
+  bb.2:
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
new file mode 100644
index 0000000000000..f0bf3d73df2cd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -0,0 +1,580 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
+
+---
+name: skip_wait_loadcnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_loadcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_LOADCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_LOADCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_loadcnt_dscnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_loadcnt_dscnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_LOADCNT_DSCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_LOADCNT_DSCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_storecnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_storecnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_STORECNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_STORECNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_storecnt_dscnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_storecnt_dscnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_STORECNT_DSCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_STORECNT_DSCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_samplecnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_samplecnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_SAMPLECNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_SAMPLECNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_bvhcnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_bvhcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_BVHCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_BVHCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_expcnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_expcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_EXPCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_EXPCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_dscnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_dscnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_DSCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_DSCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_kmcnt
+body: |
+  ; CHECK-LABEL: name: skip_wait_kmcnt
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_KMCNT 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_KMCNT 0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_wait_idle
+body: |
+  ; CHECK-LABEL: name: skip_wait_idle
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_WAIT_IDLE
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_WAIT_IDLE
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_imm
+body: |
+  ; CHECK-LABEL: name: skip_barrier_signal_imm
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_BARRIER_SIGNAL_IMM -1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_BARRIER_SIGNAL_IMM -1
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_imm
+body: |
+  ; CHECK-LABEL: name: skip_barrier_signal_isfirst_imm
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_m0
+body: |
+  ; CHECK-LABEL: name: skip_barrier_signal_m0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   $m0 = S_MOV_B32 -1
+  ; CHECK-NEXT:   S_BARRIER_SIGNAL_M0 implicit $m0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    $m0 = S_MOV_B32 -1
+    S_BARRIER_SIGNAL_M0 implicit $m0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_m0
+body: |
+  ; CHECK-LABEL: name: skip_barrier_signal_isfirst_m0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   $m0 = S_MOV_B32 -1
+  ; CHECK-NEXT:   S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    $m0 = S_MOV_B32 -1
+    S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_wait
+body: |
+  ; CHECK-LABEL: name: skip_barrier_wait
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_BARRIER_WAIT -1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_BARRIER_WAIT -1
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_imm
+body: |
+  ; CHECK-LABEL: name: skip_barrier_init_imm
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   $m0 = S_MOV_B32 -1
+  ; CHECK-NEXT:   S_BARRIER_INIT_IMM -1, implicit $m0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    $m0 = S_MOV_B32 -1
+    S_BARRIER_INIT_IMM -1, implicit $m0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_m0
+body: |
+  ; CHECK-LABEL: name: skip_barrier_init_m0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   $m0 = S_MOV_B32 -1
+  ; CHECK-NEXT:   S_BARRIER_INIT_M0 implicit $m0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    $m0 = S_MOV_B32 -1
+    S_BARRIER_INIT_M0 implicit $m0
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_join_imm
+body: |
+  ; CHECK-LABEL: name: skip_barrier_join_imm
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_BARRIER_JOIN_IMM -1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_BARRIER_JOIN_IMM -1
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+---
+name: skip_barrier_leave
+body: |
+  ; CHECK-LABEL: name: skip_barrier_leave
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   V_NOP_e32 implicit $exec
+  ; CHECK-NEXT:   S_BARRIER_LEAVE implicit-def $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1, %bb.2
+    S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+  bb.1:
+    successors: %bb.2
+    V_NOP_e32 implicit $exec
+    S_BARRIER_LEAVE implicit-def $scc
+
+  bb.2:
+    S_ENDPGM 0
+...



More information about the llvm-commits mailing list