[PATCH] D147683: AMDGPU: mark SET_INACTIVE_* as convergent operation
Ruiling, Song via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 6 18:11:16 PDT 2023
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2ab6835f285f: AMDGPU: mark SET_INACTIVE_* as convergent operation (authored by ruiling).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D147683/new/
https://reviews.llvm.org/D147683
Files:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
Index: llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -5,34 +5,34 @@
; GCN-LABEL: should_not_hoist_set_inactive:
; GCN: ; %bb.0: ; %.entry
; GCN-NEXT: v_cmp_gt_i32_e32 vcc_lo, 3, v1
-; GCN-NEXT: v_cmp_eq_u32_e64 s6, 0, v0
-; GCN-NEXT: v_cmp_ne_u32_e64 s5, 0, v2
-; GCN-NEXT: v_mov_b32_e32 v3, s4
-; GCN-NEXT: s_not_b32 exec_lo, exec_lo
-; GCN-NEXT: v_mov_b32_e32 v3, 0
-; GCN-NEXT: s_not_b32 exec_lo, exec_lo
-; GCN-NEXT: s_mov_b32 s4, 0
+; GCN-NEXT: v_cmp_eq_u32_e64 s5, 0, v0
+; GCN-NEXT: v_cmp_ne_u32_e64 s6, 0, v2
+; GCN-NEXT: s_mov_b32 s7, 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %bb4
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
; GCN-NEXT: s_waitcnt_depctr 0xffe3
-; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s7
-; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
+; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s8
+; GCN-NEXT: s_andn2_b32 exec_lo, exec_lo, s7
; GCN-NEXT: s_cbranch_execz .LBB0_5
; GCN-NEXT: .LBB0_2: ; %bb
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT: s_and_b32 s7, exec_lo, s5
-; GCN-NEXT: s_or_b32 s4, s7, s4
-; GCN-NEXT: s_and_saveexec_b32 s7, vcc_lo
+; GCN-NEXT: s_and_b32 s8, exec_lo, s6
+; GCN-NEXT: s_or_b32 s7, s8, s7
+; GCN-NEXT: s_and_saveexec_b32 s8, vcc_lo
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.3: ; %bb1
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT: s_or_saveexec_b32 s8, -1
+; GCN-NEXT: v_mov_b32_e32 v3, s4
+; GCN-NEXT: s_not_b32 exec_lo, exec_lo
+; GCN-NEXT: v_mov_b32_e32 v3, 0
+; GCN-NEXT: s_not_b32 exec_lo, exec_lo
+; GCN-NEXT: s_or_saveexec_b32 s9, -1
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mov_b32_dpp v4, v3 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GCN-NEXT: s_mov_b32 exec_lo, s8
+; GCN-NEXT: s_mov_b32 exec_lo, s9
; GCN-NEXT: v_mov_b32_e32 v0, v4
-; GCN-NEXT: s_and_b32 exec_lo, exec_lo, s6
+; GCN-NEXT: s_and_b32 exec_lo, exec_lo, s5
; GCN-NEXT: s_cbranch_execz .LBB0_1
; GCN-NEXT: ; %bb.4: ; %bb2
; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -228,7 +228,7 @@
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
-let Defs = [SCC] in {
+let Defs = [SCC], isConvergent = 1 in {
def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VSrc_b32: $src, VSrc_b32:$inactive),
[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147683.511581.patch
Type: text/x-patch
Size: 2891 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230407/b8f856ec/attachment.bin>
More information about the llvm-commits
mailing list