[llvm] [AMDGPU] Ensure that V_SET_INACTIVE inactive input is WWM computed (PR #98858)
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 14 22:58:33 PDT 2024
https://github.com/perlfu created https://github.com/llvm/llvm-project/pull/98858
WWM global flag must be set to ensure V_SET_INACTIVE inactive lane input is computed in WWM.
Full lowering may be skipped if global flag is not present.
>From 574004452bc1a3ce1e0be12764ffb67e4d3cb668 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson at amd.com>
Date: Sun, 14 Jul 2024 11:08:21 +0900
Subject: [PATCH] [AMDGPU] Ensure that V_SET_INACTIVE inactive input is WWM
computed
WWM global flag must be set to ensure V_SET_INACTIVE inactive lane
input is computed in WWM.
Full lowering may be skipped if global flag is not present.
---
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 1 +
.../GlobalISel/llvm.amdgcn.set.inactive.ll | 124 ++++++-----
.../llvm.amdgcn.set.inactive.chain.arg.ll | 192 +++++++++++++-----
3 files changed, 217 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 19e761ef45b25..188c69a8733f9 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -564,6 +564,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
}
}
SetInactiveInstrs.push_back(&MI);
+ GlobalFlags |= StateStrictWWM;
} else if (TII->isDisableWQM(MI)) {
BBI.Needs |= StateExact;
if (!(BBI.InNeeds & StateExact)) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index 0c60be9d94591..db79d27759fd6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -96,14 +96,16 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
; GCN-LABEL: set_inactive_f32:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
+; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v1
+; GCN-NEXT: v_mov_b32_e32 v0, v0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -117,16 +119,18 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
; GCN-LABEL: set_inactive_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
-; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: s_mov_b32 s6, 0xcccccccd
+; GCN-NEXT: s_mov_b32 s7, 0x4010cccc
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: v_mov_b32_e32 v1, s7
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, v3
+; GCN-NEXT: v_mov_b32_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -140,14 +144,16 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) {
; GCN-LABEL: set_inactive_v2i16:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GCN-NEXT: v_mov_b32_e32 v1, 0x10001
+; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0x10001
+; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v1
+; GCN-NEXT: v_mov_b32_e32 v0, v0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -160,14 +166,16 @@ define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %
define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) {
; GCN-LABEL: set_inactive_v2f16:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GCN-NEXT: v_mov_b32_e32 v1, 0x3c003c00
+; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00
+; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v1
+; GCN-NEXT: v_mov_b32_e32 v0, v0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -181,16 +189,18 @@ define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %
; GCN-LABEL: set_inactive_v2i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GCN-NEXT: s_mov_b32 s4, 1
-; GCN-NEXT: s_mov_b32 s5, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: s_mov_b32 s6, 1
+; GCN-NEXT: s_mov_b32 s7, s6
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: v_mov_b32_e32 v1, s7
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, v3
+; GCN-NEXT: v_mov_b32_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -205,16 +215,18 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
; GCN-LABEL: set_inactive_v2f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GCN-NEXT: s_mov_b32 s4, 1.0
-; GCN-NEXT: s_mov_b32 s5, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: s_mov_b32 s6, 1.0
+; GCN-NEXT: s_mov_b32 s7, s6
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: v_mov_b32_e32 v1, s7
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, v3
+; GCN-NEXT: v_mov_b32_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -228,14 +240,16 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) {
; GCN-LABEL: set_inactive_v2bf16:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
+; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GCN-NEXT: v_mov_b32_e32 v1, 0x3f803f80
+; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
+; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80
+; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v1
+; GCN-NEXT: v_mov_b32_e32 v0, v0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
@@ -249,16 +263,18 @@ define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %
; GCN-LABEL: set_inactive_v4i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GCN-NEXT: s_mov_b32 s4, 0x10001
-; GCN-NEXT: s_mov_b32 s5, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: s_mov_b32 s6, 0x10001
+; GCN-NEXT: s_mov_b32 s7, s6
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: v_mov_b32_e32 v1, s7
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, v3
+; GCN-NEXT: v_mov_b32_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -273,16 +289,18 @@ define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half>
; GCN-LABEL: set_inactive_v4f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
-; GCN-NEXT: s_mov_b32 s5, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: s_mov_b32 s6, 0x3c003c00
+; GCN-NEXT: s_mov_b32 s7, s6
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: v_mov_b32_e32 v1, s7
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, v3
+; GCN-NEXT: v_mov_b32_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
@@ -297,16 +315,18 @@ define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloa
; GCN-LABEL: set_inactive_v4bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GCN-NEXT: s_mov_b32 s4, 0x3f803f80
-; GCN-NEXT: s_mov_b32 s5, s4
-; GCN-NEXT: v_mov_b32_e32 v2, s4
-; GCN-NEXT: v_mov_b32_e32 v3, s5
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: s_mov_b32 s6, 0x3f803f80
+; GCN-NEXT: s_mov_b32 s7, s6
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: v_mov_b32_e32 v1, s7
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_not_b64 exec, exec
-; GCN-NEXT: v_mov_b32_e32 v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, v3
+; GCN-NEXT: v_mov_b32_e32 v0, v0
+; GCN-NEXT: v_mov_b32_e32 v1, v1
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
index b3acd4949301e..e6acd9996b09a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
@@ -12,9 +12,13 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX11-LABEL: set_inactive_chain_arg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_or_saveexec_b32 s0, -1
+; GFX11-NEXT: v_mov_b32_e32 v0, v10
+; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_mov_b32_e32 v0, v11
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX11-NEXT: v_mov_b32_e32 v0, v10
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_mov_b32_e32 v0, v0
; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
; GFX11-NEXT: global_store_b32 v[8:9], v0, off
; GFX11-NEXT: s_endpgm
@@ -22,9 +26,12 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX10-LABEL: set_inactive_chain_arg:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_or_saveexec_b32 s0, -1
+; GFX10-NEXT: v_mov_b32_e32 v0, v10
+; GFX10-NEXT: s_mov_b32 exec_lo, s0
; GFX10-NEXT: v_mov_b32_e32 v0, v11
; GFX10-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX10-NEXT: v_mov_b32_e32 v0, v10
+; GFX10-NEXT: v_mov_b32_e32 v0, v0
; GFX10-NEXT: s_not_b32 exec_lo, exec_lo
; GFX10-NEXT: global_store_dword v[8:9], v0, off
; GFX10-NEXT: s_endpgm
@@ -32,9 +39,13 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX11_W64-LABEL: set_inactive_chain_arg:
; GFX11_W64: ; %bb.0:
; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10
+; GFX11_W64-NEXT: s_mov_b64 exec, s[0:1]
; GFX11_W64-NEXT: v_mov_b32_e32 v0, v11
; GFX11_W64-NEXT: s_not_b64 exec, exec
-; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10
+; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11_W64-NEXT: v_mov_b32_e32 v0, v0
; GFX11_W64-NEXT: s_not_b64 exec, exec
; GFX11_W64-NEXT: global_store_b32 v[8:9], v0, off
; GFX11_W64-NEXT: s_endpgm
@@ -42,9 +53,12 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
; GFX10_W64-LABEL: set_inactive_chain_arg:
; GFX10_W64: ; %bb.0:
; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GFX10_W64-NEXT: v_mov_b32_e32 v0, v10
+; GFX10_W64-NEXT: s_mov_b64 exec, s[0:1]
; GFX10_W64-NEXT: v_mov_b32_e32 v0, v11
; GFX10_W64-NEXT: s_not_b64 exec, exec
-; GFX10_W64-NEXT: v_mov_b32_e32 v0, v10
+; GFX10_W64-NEXT: v_mov_b32_e32 v0, v0
; GFX10_W64-NEXT: s_not_b64 exec, exec
; GFX10_W64-NEXT: global_store_dword v[8:9], v0, off
; GFX10_W64-NEXT: s_endpgm
@@ -54,53 +68,135 @@ define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %
}
define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i64 %inactive, i64 %active) {
-; GFX11-LABEL: set_inactive_chain_arg_64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_mov_b32_e32 v0, v12
-; GFX11-NEXT: v_mov_b32_e32 v1, v13
-; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX11-NEXT: v_mov_b32_e32 v0, v10
-; GFX11-NEXT: v_mov_b32_e32 v1, v11
-; GFX11-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX11-NEXT: global_store_b64 v[8:9], v[0:1], off
-; GFX11-NEXT: s_endpgm
+; GISEL11-LABEL: set_inactive_chain_arg_64:
+; GISEL11: ; %bb.0:
+; GISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL11-NEXT: s_or_saveexec_b32 s0, -1
+; GISEL11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_mov_b32 v1, v11
+; GISEL11-NEXT: s_mov_b32 exec_lo, s0
+; GISEL11-NEXT: v_mov_b32_e32 v0, v12
+; GISEL11-NEXT: v_mov_b32_e32 v1, v13
+; GISEL11-NEXT: s_not_b32 exec_lo, exec_lo
+; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL11-NEXT: v_mov_b32_e32 v0, v0
+; GISEL11-NEXT: v_mov_b32_e32 v1, v1
+; GISEL11-NEXT: s_not_b32 exec_lo, exec_lo
+; GISEL11-NEXT: global_store_b64 v[8:9], v[0:1], off
+; GISEL11-NEXT: s_endpgm
;
-; GFX10-LABEL: set_inactive_chain_arg_64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v0, v12
-; GFX10-NEXT: v_mov_b32_e32 v1, v13
-; GFX10-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX10-NEXT: v_mov_b32_e32 v0, v10
-; GFX10-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-NEXT: s_not_b32 exec_lo, exec_lo
-; GFX10-NEXT: global_store_dwordx2 v[8:9], v[0:1], off
-; GFX10-NEXT: s_endpgm
+; DAGISEL11-LABEL: set_inactive_chain_arg_64:
+; DAGISEL11: ; %bb.0:
+; DAGISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1
+; DAGISEL11-NEXT: v_dual_mov_b32 v1, v11 :: v_dual_mov_b32 v0, v10
+; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0
+; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12
+; DAGISEL11-NEXT: v_mov_b32_e32 v1, v13
+; DAGISEL11-NEXT: s_not_b32 exec_lo, exec_lo
+; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; DAGISEL11-NEXT: v_mov_b32_e32 v0, v0
+; DAGISEL11-NEXT: v_mov_b32_e32 v1, v1
+; DAGISEL11-NEXT: s_not_b32 exec_lo, exec_lo
+; DAGISEL11-NEXT: global_store_b64 v[8:9], v[0:1], off
+; DAGISEL11-NEXT: s_endpgm
;
-; GFX11_W64-LABEL: set_inactive_chain_arg_64:
-; GFX11_W64: ; %bb.0:
-; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11_W64-NEXT: v_mov_b32_e32 v0, v12
-; GFX11_W64-NEXT: v_mov_b32_e32 v1, v13
-; GFX11_W64-NEXT: s_not_b64 exec, exec
-; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10
-; GFX11_W64-NEXT: v_mov_b32_e32 v1, v11
-; GFX11_W64-NEXT: s_not_b64 exec, exec
-; GFX11_W64-NEXT: global_store_b64 v[8:9], v[0:1], off
-; GFX11_W64-NEXT: s_endpgm
+; GISEL10-LABEL: set_inactive_chain_arg_64:
+; GISEL10: ; %bb.0:
+; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL10-NEXT: s_or_saveexec_b32 s0, -1
+; GISEL10-NEXT: v_mov_b32_e32 v0, v10
+; GISEL10-NEXT: v_mov_b32_e32 v1, v11
+; GISEL10-NEXT: s_mov_b32 exec_lo, s0
+; GISEL10-NEXT: v_mov_b32_e32 v0, v12
+; GISEL10-NEXT: v_mov_b32_e32 v1, v13
+; GISEL10-NEXT: s_not_b32 exec_lo, exec_lo
+; GISEL10-NEXT: v_mov_b32_e32 v0, v0
+; GISEL10-NEXT: v_mov_b32_e32 v1, v1
+; GISEL10-NEXT: s_not_b32 exec_lo, exec_lo
+; GISEL10-NEXT: global_store_dwordx2 v[8:9], v[0:1], off
+; GISEL10-NEXT: s_endpgm
;
-; GFX10_W64-LABEL: set_inactive_chain_arg_64:
-; GFX10_W64: ; %bb.0:
-; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10_W64-NEXT: v_mov_b32_e32 v0, v12
-; GFX10_W64-NEXT: v_mov_b32_e32 v1, v13
-; GFX10_W64-NEXT: s_not_b64 exec, exec
-; GFX10_W64-NEXT: v_mov_b32_e32 v0, v10
-; GFX10_W64-NEXT: v_mov_b32_e32 v1, v11
-; GFX10_W64-NEXT: s_not_b64 exec, exec
-; GFX10_W64-NEXT: global_store_dwordx2 v[8:9], v[0:1], off
-; GFX10_W64-NEXT: s_endpgm
+; DAGISEL10-LABEL: set_inactive_chain_arg_64:
+; DAGISEL10: ; %bb.0:
+; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1
+; DAGISEL10-NEXT: v_mov_b32_e32 v1, v11
+; DAGISEL10-NEXT: v_mov_b32_e32 v0, v10
+; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0
+; DAGISEL10-NEXT: v_mov_b32_e32 v0, v12
+; DAGISEL10-NEXT: v_mov_b32_e32 v1, v13
+; DAGISEL10-NEXT: s_not_b32 exec_lo, exec_lo
+; DAGISEL10-NEXT: v_mov_b32_e32 v0, v0
+; DAGISEL10-NEXT: v_mov_b32_e32 v1, v1
+; DAGISEL10-NEXT: s_not_b32 exec_lo, exec_lo
+; DAGISEL10-NEXT: global_store_dwordx2 v[8:9], v[0:1], off
+; DAGISEL10-NEXT: s_endpgm
+;
+; GISEL11_W64-LABEL: set_inactive_chain_arg_64:
+; GISEL11_W64: ; %bb.0:
+; GISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v10
+; GISEL11_W64-NEXT: v_mov_b32_e32 v1, v11
+; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1]
+; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
+; GISEL11_W64-NEXT: v_mov_b32_e32 v1, v13
+; GISEL11_W64-NEXT: s_not_b64 exec, exec
+; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v0
+; GISEL11_W64-NEXT: v_mov_b32_e32 v1, v1
+; GISEL11_W64-NEXT: s_not_b64 exec, exec
+; GISEL11_W64-NEXT: global_store_b64 v[8:9], v[0:1], off
+; GISEL11_W64-NEXT: s_endpgm
+;
+; DAGISEL11_W64-LABEL: set_inactive_chain_arg_64:
+; DAGISEL11_W64: ; %bb.0:
+; DAGISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1
+; DAGISEL11_W64-NEXT: v_mov_b32_e32 v1, v11
+; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v10
+; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1]
+; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12
+; DAGISEL11_W64-NEXT: v_mov_b32_e32 v1, v13
+; DAGISEL11_W64-NEXT: s_not_b64 exec, exec
+; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v0
+; DAGISEL11_W64-NEXT: v_mov_b32_e32 v1, v1
+; DAGISEL11_W64-NEXT: s_not_b64 exec, exec
+; DAGISEL11_W64-NEXT: global_store_b64 v[8:9], v[0:1], off
+; DAGISEL11_W64-NEXT: s_endpgm
+;
+; GISEL10_W64-LABEL: set_inactive_chain_arg_64:
+; GISEL10_W64: ; %bb.0:
+; GISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1
+; GISEL10_W64-NEXT: v_mov_b32_e32 v0, v10
+; GISEL10_W64-NEXT: v_mov_b32_e32 v1, v11
+; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1]
+; GISEL10_W64-NEXT: v_mov_b32_e32 v0, v12
+; GISEL10_W64-NEXT: v_mov_b32_e32 v1, v13
+; GISEL10_W64-NEXT: s_not_b64 exec, exec
+; GISEL10_W64-NEXT: v_mov_b32_e32 v0, v0
+; GISEL10_W64-NEXT: v_mov_b32_e32 v1, v1
+; GISEL10_W64-NEXT: s_not_b64 exec, exec
+; GISEL10_W64-NEXT: global_store_dwordx2 v[8:9], v[0:1], off
+; GISEL10_W64-NEXT: s_endpgm
+;
+; DAGISEL10_W64-LABEL: set_inactive_chain_arg_64:
+; DAGISEL10_W64: ; %bb.0:
+; DAGISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1
+; DAGISEL10_W64-NEXT: v_mov_b32_e32 v1, v11
+; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, v10
+; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1]
+; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, v12
+; DAGISEL10_W64-NEXT: v_mov_b32_e32 v1, v13
+; DAGISEL10_W64-NEXT: s_not_b64 exec, exec
+; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, v0
+; DAGISEL10_W64-NEXT: v_mov_b32_e32 v1, v1
+; DAGISEL10_W64-NEXT: s_not_b64 exec, exec
+; DAGISEL10_W64-NEXT: global_store_dwordx2 v[8:9], v[0:1], off
+; DAGISEL10_W64-NEXT: s_endpgm
%tmp = call i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64 %active, i64 %inactive) #0
store i64 %tmp, ptr addrspace(1) %out
ret void
More information about the llvm-commits
mailing list