[llvm] [AMDGPU][SIInsertWaitcnts] Set initial state for VS_CNT in non-kernel functions (PR #75436)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 13 23:31:47 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Pierre van Houtryve (Pierre-vh)

<details>
<summary>Changes</summary>

Split from #<!-- -->72830 

---

Patch is 27.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75436.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+10) 
- (modified) llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll (-28) 
- (modified) llvm/test/CodeGen/AMDGPU/release-vgprs.mir (+99-226) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c16583f6a7f9ac..dfe67f4c189540 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -292,6 +292,11 @@ class WaitcntBrackets {
     VgprVmemTypes[GprNo] = 0;
   }
 
+  void setNonKernelFunctionInitialState() {
+    setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
+    PendingEvents |= WaitEventMaskForInst[VS_CNT];
+  }
+
   void print(raw_ostream &);
   void dump() { print(dbgs()); }
 
@@ -1865,6 +1870,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
       ;
     BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
 
+    auto NonKernelInitialState =
+        std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
+    NonKernelInitialState->setNonKernelFunctionInitialState();
+    BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
+
     Modified = true;
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
index 1ccd31e97a2aba..36ddd286155a34 100644
--- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
@@ -55,6 +55,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
 ; GFX11-BACKOFF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-BACKOFF-NEXT:    flat_load_b32 v0, v[0:1]
 ; GFX11-BACKOFF-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-BACKOFF-NEXT:    s_barrier
 ; GFX11-BACKOFF-NEXT:    flat_store_b32 v[2:3], v0
 ; GFX11-BACKOFF-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
index 6fd6d6e2e31a1c..65b70587fa0ace 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
@@ -17,8 +17,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v10
 ; GFX11-NEXT:    s_not_b32 exec_lo, exec_lo
 ; GFX11-NEXT:    global_store_b32 v[8:9], v0, off
-; GFX11-NEXT:    s_nop 0
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: set_inactive_chain_arg:
@@ -39,8 +37,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v0, v10
 ; GFX11_W64-NEXT:    s_not_b64 exec, exec
 ; GFX11_W64-NEXT:    global_store_b32 v[8:9], v0, off
-; GFX11_W64-NEXT:    s_nop 0
-; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
 ;
 ; GFX10_W64-LABEL: set_inactive_chain_arg:
@@ -68,8 +64,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
 ; GFX11-NEXT:    v_mov_b32_e32 v1, v11
 ; GFX11-NEXT:    s_not_b32 exec_lo, exec_lo
 ; GFX11-NEXT:    global_store_b64 v[8:9], v[0:1], off
-; GFX11-NEXT:    s_nop 0
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: set_inactive_chain_arg_64:
@@ -94,8 +88,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v1, v11
 ; GFX11_W64-NEXT:    s_not_b64 exec, exec
 ; GFX11_W64-NEXT:    global_store_b64 v[8:9], v[0:1], off
-; GFX11_W64-NEXT:    s_nop 0
-; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
 ;
 ; GFX10_W64-LABEL: set_inactive_chain_arg_64:
@@ -133,8 +125,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX11-NEXT:    global_store_b32 v[8:9], v2, off
-; GFX11-NEXT:    s_nop 0
-; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: set_inactive_chain_arg_dpp:
@@ -174,8 +164,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
 ; GFX11_W64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11_W64-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX11_W64-NEXT:    global_store_b32 v[8:9], v2, off
-; GFX11_W64-NEXT:    s_nop 0
-; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
 ;
 ; GFX10_W64-LABEL: set_inactive_chain_arg_dpp:
@@ -233,8 +221,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
 ; GISEL11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL11-NEXT:    v_mov_b32_e32 v0, v12
 ; GISEL11-NEXT:    global_store_b32 v[41:42], v0, off
-; GISEL11-NEXT:    s_nop 0
-; GISEL11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GISEL11-NEXT:    s_endpgm
 ;
 ; DAGISEL11-LABEL: set_inactive_chain_arg_call:
@@ -265,8 +251,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
 ; DAGISEL11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; DAGISEL11-NEXT:    v_mov_b32_e32 v0, v12
 ; DAGISEL11-NEXT:    global_store_b32 v[41:42], v0, off
-; DAGISEL11-NEXT:    s_nop 0
-; DAGISEL11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; DAGISEL11-NEXT:    s_endpgm
 ;
 ; GISEL10-LABEL: set_inactive_chain_arg_call:
@@ -380,8 +364,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
 ; GISEL11_W64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL11_W64-NEXT:    v_mov_b32_e32 v0, v12
 ; GISEL11_W64-NEXT:    global_store_b32 v[41:42], v0, off
-; GISEL11_W64-NEXT:    s_nop 0
-; GISEL11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GISEL11_W64-NEXT:    s_endpgm
 ;
 ; DAGISEL11_W64-LABEL: set_inactive_chain_arg_call:
@@ -419,8 +401,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
 ; DAGISEL11_W64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; DAGISEL11_W64-NEXT:    v_mov_b32_e32 v0, v12
 ; DAGISEL11_W64-NEXT:    global_store_b32 v[41:42], v0, off
-; DAGISEL11_W64-NEXT:    s_nop 0
-; DAGISEL11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; DAGISEL11_W64-NEXT:    s_endpgm
 ;
 ; GISEL10_W64-LABEL: set_inactive_chain_arg_call:
@@ -538,8 +518,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
 ; GISEL11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL11-NEXT:    v_mov_b32_e32 v0, v12
 ; GISEL11-NEXT:    global_store_b32 v[41:42], v0, off
-; GISEL11-NEXT:    s_nop 0
-; GISEL11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GISEL11-NEXT:    s_endpgm
 ;
 ; DAGISEL11-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -570,8 +548,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
 ; DAGISEL11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; DAGISEL11-NEXT:    v_mov_b32_e32 v0, v12
 ; DAGISEL11-NEXT:    global_store_b32 v[41:42], v0, off
-; DAGISEL11-NEXT:    s_nop 0
-; DAGISEL11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; DAGISEL11-NEXT:    s_endpgm
 ;
 ; GISEL10-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -685,8 +661,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
 ; GISEL11_W64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GISEL11_W64-NEXT:    v_mov_b32_e32 v0, v12
 ; GISEL11_W64-NEXT:    global_store_b32 v[41:42], v0, off
-; GISEL11_W64-NEXT:    s_nop 0
-; GISEL11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GISEL11_W64-NEXT:    s_endpgm
 ;
 ; DAGISEL11_W64-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -724,8 +698,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
 ; DAGISEL11_W64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; DAGISEL11_W64-NEXT:    v_mov_b32_e32 v0, v12
 ; DAGISEL11_W64-NEXT:    global_store_b32 v[41:42], v0, off
-; DAGISEL11_W64-NEXT:    s_nop 0
-; DAGISEL11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; DAGISEL11_W64-NEXT:    s_endpgm
 ;
 ; GISEL10_W64-LABEL: set_inactive_chain_arg_last_vgpr:
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index 3a879e818af797..e57fc0311bd3c6 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,OPT
-# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,NOOPT
+# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s
 
 --- |
   define amdgpu_ps void @tbuffer_store1() { ret void }
@@ -28,17 +28,10 @@
 name:            tbuffer_store1
 body:             |
   bb.0:
-    ; OPT-LABEL: name: tbuffer_store1
-    ; OPT: S_WAITCNT 0
-    ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
-    ; OPT-NEXT: S_NOP 0
-    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
-    ; OPT-NEXT: S_ENDPGM 0
-    ;
-    ; NOOPT-LABEL: name: tbuffer_store1
-    ; NOOPT: S_WAITCNT 0
-    ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
-    ; NOOPT-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: tbuffer_store1
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
     TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
@@ -47,17 +40,10 @@ body:             |
 name:            tbuffer_store2
 body:             |
   bb.0:
-    ; OPT-LABEL: name: tbuffer_store2
-    ; OPT: S_WAITCNT 0
-    ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
-    ; OPT-NEXT: S_NOP 0
-    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
-    ; OPT-NEXT: S_ENDPGM 0
-    ;
-    ; NOOPT-LABEL: name: tbuffer_store2
-    ; NOOPT: S_WAITCNT 0
-    ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
-    ; NOOPT-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: tbuffer_store2
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+    ; CHECK-NEXT: S_ENDPGM 0
     TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
     S_ENDPGM 0
 ...
@@ -78,19 +64,11 @@ body:             |
 name:            global_store
 body:             |
   bb.0:
-    ; OPT-LABEL: name: global_store
-    ; OPT: S_WAITCNT 0
-    ; OPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
-    ; OPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
-    ; OPT-NEXT: S_NOP 0
-    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
-    ; OPT-NEXT: S_ENDPGM 0
-    ;
-    ; NOOPT-LABEL: name: global_store
-    ; NOOPT: S_WAITCNT 0
-    ; NOOPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
-    ; NOOPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
-    ; NOOPT-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: global_store
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+    ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+    ; CHECK-NEXT: S_ENDPGM 0
     GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
     S_WAITCNT_VSCNT undef $sgpr_null, 0
     S_ENDPGM 0
@@ -100,17 +78,10 @@ body:             |
 name:            buffer_store_format
 body:             |
   bb.0:
-    ; OPT-LABEL: name: buffer_store_format
-    ; OPT: S_WAITCNT 0
-    ; OPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
-    ; OPT-NEXT: S_NOP 0
-    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
-    ; OPT-NEXT: S_ENDPGM 0
-    ;
-    ; NOOPT-LABEL: name: buffer_store_format
-    ; NOOPT: S_WAITCNT 0
-    ; NOOPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
-    ; NOOPT-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: buffer_store_format
+    ; CHECK: S_WAITCNT 0
+    ; CHECK-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
     BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
     S_ENDPGM 0
 ...
@@ -137,23 +108,13 @@ body:             |
   bb.0:
     liveins: $vgpr0, $sgpr0_sgpr1
 
-    ; OPT-LABEL: name: global_store_dword
-    ; OPT: liveins: $vgpr0, $sgpr0_sgpr1
-    ; OPT-NEXT: {{  $}}
-    ; OPT-NEXT: S_WAITCNT 0
-    ; OPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
-    ; OPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
-    ; OPT-NEXT: S_NOP 0
-    ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
-    ; OPT-NEXT: S_ENDPGM 0
-    ;
-    ; NOOPT-LABEL: name: global_store_dword
-    ; NOOPT: liveins: $vgpr0, $sgpr0_sgpr1
-    ; NOOPT-NEXT: {{  $}}
-    ; NOOPT-NEXT: S_WAITCNT 0
-    ; NOOPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
-    ; NOOPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
-    ; NOOPT-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: global_store_dword
+    ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_WAITCNT 0
+    ; CHECK-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+    ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
     renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
     GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
     S_ENDPGM 0
@@ -206,45 +167,24 @@ body:             |
 ---
 name:            multiple_basic_blocks2
 body:             |
-  ; OPT-LABEL: name: multiple_basic_blocks2
-  ; OPT: bb.0:
-  ; OPT-NEXT:   successors: %bb.2(0x80000000)
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT:   S_WAITCNT 0
-  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
-  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; OPT-NEXT:   S_BRANCH %bb.2
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT: bb.1:
-  ; OPT-NEXT:   successors: %bb.2(0x80000000)
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
-  ; OPT-NEXT:   S_BRANCH %bb.2
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT: bb.2:
-  ; OPT-NEXT:   S_NOP 0
-  ; OPT-NEXT:   S_SENDMSG 3, implicit $exec, implicit $m0
-  ; OPT-NEXT:   S_ENDPGM 0
-  ;
-  ; NOOPT-LABEL: name: multiple_basic_blocks2
-  ; NOOPT: bb.0:
-  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
-  ; NOOPT-NEXT: {{  $}}
-  ; NOOPT-NEXT:   S_WAITCNT 0
-  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
-  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; NOOPT-NEXT:   S_BRANCH %bb.2
-  ; NOOPT-NEXT: {{  $}}
-  ; NOOPT-NEXT: bb.1:
-  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
-  ; NOOPT-NEXT: {{  $}}
-  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
-  ; NOOPT-NEXT:   S_BRANCH %bb.2
-  ; NOOPT-NEXT: {{  $}}
-  ; NOOPT-NEXT: bb.2:
-  ; NOOPT-NEXT:   S_ENDPGM 0
+  ; CHECK-LABEL: name: multiple_basic_blocks2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_WAITCNT 0
+  ; CHECK-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; CHECK-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
   bb.0:
     successors: %bb.2
 
@@ -268,65 +208,34 @@ body:             |
 ---
 name:            multiple_basic_blocks3
 body:             |
-  ; OPT-LABEL: name: multiple_basic_blocks3
-  ; OPT: bb.0:
-  ; OPT-NEXT:   successors: %bb.2(0x80000000)
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT:   S_WAITCNT 0
-  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; OPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
-  ; OPT-NEXT:   S_BRANCH %bb.2
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT: bb.1:
-  ; OPT-NEXT:   successors: %bb.2(0x80000000)
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; OPT-NEXT:   S_BRANCH %bb.2
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT: bb.2:
-  ; OPT-NEXT:   successors: %bb.4(0x80000000)
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT:   S_BRANCH %bb.4
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT: bb.3:
-  ; OPT-NEXT:   successors: %bb.4(0x80000000)
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; OPT-NEXT:   S_BRANCH %bb.4
-  ; OPT-NEXT: {{  $}}
-  ; OPT-NEXT: bb.4:
-  ; OPT-NEXT:   S_NOP 0
-  ; OPT-NEXT:   S_SENDMSG 3, implicit $exec, implicit $m0
-  ; OPT-NEXT:   S_ENDPGM 0
-  ;
-  ; NOOPT-LABEL: name: multiple_basic_blocks3
-  ; NOOPT: bb.0:
-  ; NOOPT-NEXT:   successors: %bb.2(0x80000000)
-  ; NOOPT-NEXT: {{  $}}
-  ; NOOPT-NEXT:   S_WAITCNT 0
-  ; NOOPT-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
-  ; NOOPT-NEXT:   TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
-  ; NOOPT-NEXT:   S_BRANCH %bb.2
-  ; NOOPT-NEXT: {{  $}}
-  ; NO...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/75436


More information about the llvm-commits mailing list