[llvm] f1ea77f - [AMDGPU][SIInsertWaitcnts] Set initial state for VS_CNT in non-kernel functions (#75436)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 23:31:17 PST 2023
Author: Pierre van Houtryve
Date: 2023-12-15T08:31:14+01:00
New Revision: f1ea77f7be8acda2aa4b08ba27f454512a872057
URL: https://github.com/llvm/llvm-project/commit/f1ea77f7be8acda2aa4b08ba27f454512a872057
DIFF: https://github.com/llvm/llvm-project/commit/f1ea77f7be8acda2aa4b08ba27f454512a872057.diff
LOG: [AMDGPU][SIInsertWaitcnts] Set initial state for VS_CNT in non-kernel functions (#75436)
Split from #72830
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
llvm/test/CodeGen/AMDGPU/release-vgprs.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c16583f6a7f9ac..dfe67f4c189540 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -292,6 +292,11 @@ class WaitcntBrackets {
VgprVmemTypes[GprNo] = 0;
}
+ void setNonKernelFunctionInitialState() {
+ setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
+ PendingEvents |= WaitEventMaskForInst[VS_CNT];
+ }
+
void print(raw_ostream &);
void dump() { print(dbgs()); }
@@ -1865,6 +1870,11 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
+ auto NonKernelInitialState =
+ std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
+ NonKernelInitialState->setNonKernelFunctionInitialState();
+ BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
+
Modified = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
index 1ccd31e97a2aba..36ddd286155a34 100644
--- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
@@ -55,6 +55,7 @@ define void @back_off_barrier_no_fence(ptr %in, ptr %out) #0 {
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-BACKOFF-NEXT: flat_load_b32 v0, v[0:1]
; GFX11-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-BACKOFF-NEXT: s_barrier
; GFX11-BACKOFF-NEXT: flat_store_b32 v[2:3], v0
; GFX11-BACKOFF-NEXT: s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
index 6fd6d6e2e31a1c..65b70587fa0ace 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
@@ -17,8 +17,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX11-NEXT: v_mov_b32_e32 v0, v10
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
; GFX11-NEXT: global_store_b32 v[8:9], v0, off
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX10-LABEL: set_inactive_chain_arg:
@@ -39,8 +37,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10
; GFX11_W64-NEXT: s_not_b64 exec, exec
; GFX11_W64-NEXT: global_store_b32 v[8:9], v0, off
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
;
; GFX10_W64-LABEL: set_inactive_chain_arg:
@@ -68,8 +64,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
; GFX11-NEXT: v_mov_b32_e32 v1, v11
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
; GFX11-NEXT: global_store_b64 v[8:9], v[0:1], off
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX10-LABEL: set_inactive_chain_arg_64:
@@ -94,8 +88,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i6
; GFX11_W64-NEXT: v_mov_b32_e32 v1, v11
; GFX11_W64-NEXT: s_not_b64 exec, exec
; GFX11_W64-NEXT: global_store_b64 v[8:9], v[0:1], off
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
;
; GFX10_W64-LABEL: set_inactive_chain_arg_64:
@@ -133,8 +125,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_mov_b32_e32 v2, v1
; GFX11-NEXT: global_store_b32 v[8:9], v2, off
-; GFX11-NEXT: s_nop 0
-; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
;
; GFX10-LABEL: set_inactive_chain_arg_dpp:
@@ -174,8 +164,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i
; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11_W64-NEXT: v_mov_b32_e32 v2, v1
; GFX11_W64-NEXT: global_store_b32 v[8:9], v2, off
-; GFX11_W64-NEXT: s_nop 0
-; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11_W64-NEXT: s_endpgm
;
; GFX10_W64-LABEL: set_inactive_chain_arg_dpp:
@@ -233,8 +221,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11-NEXT: v_mov_b32_e32 v0, v12
; GISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11-NEXT: s_nop 0
-; GISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11-NEXT: s_endpgm
;
; DAGISEL11-LABEL: set_inactive_chain_arg_call:
@@ -265,8 +251,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11-NEXT: s_nop 0
-; DAGISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11-NEXT: s_endpgm
;
; GISEL10-LABEL: set_inactive_chain_arg_call:
@@ -380,8 +364,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11_W64-NEXT: s_nop 0
-; GISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11_W64-NEXT: s_endpgm
;
; DAGISEL11_W64-LABEL: set_inactive_chain_arg_call:
@@ -419,8 +401,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out,
; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11_W64-NEXT: s_nop 0
-; DAGISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11_W64-NEXT: s_endpgm
;
; GISEL10_W64-LABEL: set_inactive_chain_arg_call:
@@ -538,8 +518,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11-NEXT: v_mov_b32_e32 v0, v12
; GISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11-NEXT: s_nop 0
-; GISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11-NEXT: s_endpgm
;
; DAGISEL11-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -570,8 +548,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11-NEXT: s_nop 0
-; DAGISEL11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11-NEXT: s_endpgm
;
; GISEL10-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -685,8 +661,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; GISEL11_W64-NEXT: s_nop 0
-; GISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL11_W64-NEXT: s_endpgm
;
; DAGISEL11_W64-LABEL: set_inactive_chain_arg_last_vgpr:
@@ -724,8 +698,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off
-; DAGISEL11_W64-NEXT: s_nop 0
-; DAGISEL11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; DAGISEL11_W64-NEXT: s_endpgm
;
; GISEL10_W64-LABEL: set_inactive_chain_arg_last_vgpr:
diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
index 3a879e818af797..e57fc0311bd3c6 100644
--- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
+++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,OPT
-# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=CHECK,NOOPT
+# RUN: llc -O2 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -run-pass=si-insert-waitcnts -verify-machineinstrs -o - %s | FileCheck %s
--- |
define amdgpu_ps void @tbuffer_store1() { ret void }
@@ -28,17 +28,10 @@
name: tbuffer_store1
body: |
bb.0:
- ; OPT-LABEL: name: tbuffer_store1
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: tbuffer_store1
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: tbuffer_store1
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -47,17 +40,10 @@ body: |
name: tbuffer_store2
body: |
bb.0:
- ; OPT-LABEL: name: tbuffer_store2
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: tbuffer_store2
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: tbuffer_store2
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
+ ; CHECK-NEXT: S_ENDPGM 0
TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7)
S_ENDPGM 0
...
@@ -78,19 +64,11 @@ body: |
name: global_store
body: |
bb.0:
- ; OPT-LABEL: name: global_store
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
- ; OPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: global_store
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
- ; NOOPT-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: global_store
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
+ ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
+ ; CHECK-NEXT: S_ENDPGM 0
GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec
S_WAITCNT_VSCNT undef $sgpr_null, 0
S_ENDPGM 0
@@ -100,17 +78,10 @@ body: |
name: buffer_store_format
body: |
bb.0:
- ; OPT-LABEL: name: buffer_store_format
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: buffer_store_format
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: buffer_store_format
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@@ -137,23 +108,13 @@ body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
- ; OPT-LABEL: name: global_store_dword
- ; OPT: liveins: $vgpr0, $sgpr0_sgpr1
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
- ; OPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: global_store_dword
- ; NOOPT: liveins: $vgpr0, $sgpr0_sgpr1
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
- ; NOOPT-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: global_store_dword
+ ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec
GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec
S_ENDPGM 0
@@ -206,45 +167,24 @@ body: |
---
name: multiple_basic_blocks2
body: |
- ; OPT-LABEL: name: multiple_basic_blocks2
- ; OPT: bb.0:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.1:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.2:
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: multiple_basic_blocks2
- ; NOOPT: bb.0:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.1:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.2:
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: multiple_basic_blocks2
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.2
@@ -268,65 +208,34 @@ body: |
---
name: multiple_basic_blocks3
body: |
- ; OPT-LABEL: name: multiple_basic_blocks3
- ; OPT: bb.0:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.1:
- ; OPT-NEXT: successors: %bb.2(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.2:
- ; OPT-NEXT: successors: %bb.4(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_BRANCH %bb.4
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.3:
- ; OPT-NEXT: successors: %bb.4(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.4
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.4:
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: multiple_basic_blocks3
- ; NOOPT: bb.0:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.1:
- ; NOOPT-NEXT: successors: %bb.2(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.2:
- ; NOOPT-NEXT: successors: %bb.4(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_BRANCH %bb.4
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.3:
- ; NOOPT-NEXT: successors: %bb.4(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.4
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.4:
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: multiple_basic_blocks3
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.2
@@ -395,47 +304,25 @@ body: |
---
name: recursive_loop_vmem
body: |
- ; OPT-LABEL: name: recursive_loop_vmem
- ; OPT: bb.0:
- ; OPT-NEXT: successors: %bb.1(0x80000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 0
- ; OPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- ; OPT-NEXT: S_BRANCH %bb.1
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.1:
- ; OPT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: S_WAITCNT 1015
- ; OPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
- ; OPT-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- ; OPT-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- ; OPT-NEXT: S_BRANCH %bb.2
- ; OPT-NEXT: {{ $}}
- ; OPT-NEXT: bb.2:
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: recursive_loop_vmem
- ; NOOPT: bb.0:
- ; NOOPT-NEXT: successors: %bb.1(0x80000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 0
- ; NOOPT-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_BRANCH %bb.1
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.1:
- ; NOOPT-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: S_WAITCNT 1015
- ; NOOPT-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
- ; NOOPT-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
- ; NOOPT-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
- ; NOOPT-NEXT: S_BRANCH %bb.2
- ; NOOPT-NEXT: {{ $}}
- ; NOOPT-NEXT: bb.2:
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: recursive_loop_vmem
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 0
+ ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_WAITCNT 1015
+ ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec
+ ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.1
@@ -458,17 +345,10 @@ body: |
name: image_store
body: |
bb.0:
- ; OPT-LABEL: name: image_store
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: image_store
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: image_store
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
+ ; CHECK-NEXT: S_ENDPGM 0
IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7)
S_ENDPGM 0
...
@@ -491,17 +371,10 @@ body: |
name: buffer_atomic
body: |
bb.0:
- ; OPT-LABEL: name: buffer_atomic
- ; OPT: S_WAITCNT 0
- ; OPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
- ; OPT-NEXT: S_NOP 0
- ; OPT-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
- ; OPT-NEXT: S_ENDPGM 0
- ;
- ; NOOPT-LABEL: name: buffer_atomic
- ; NOOPT: S_WAITCNT 0
- ; NOOPT-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
- ; NOOPT-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: buffer_atomic
+ ; CHECK: S_WAITCNT 0
+ ; CHECK-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
+ ; CHECK-NEXT: S_ENDPGM 0
BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7)
S_ENDPGM 0
...
More information about the llvm-commits
mailing list