[llvm] 2ab6835 - AMDGPU: mark SET_INACTIVE_* as convergent operation

Ruiling Song via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 6 18:11:01 PDT 2023


Author: Ruiling Song
Date: 2023-04-07T09:10:43+08:00
New Revision: 2ab6835f285fbb231e90658c1c8c727defb03d9a

URL: https://github.com/llvm/llvm-project/commit/2ab6835f285fbb231e90658c1c8c727defb03d9a
DIFF: https://github.com/llvm/llvm-project/commit/2ab6835f285fbb231e90658c1c8c727defb03d9a.diff

LOG: AMDGPU: mark SET_INACTIVE_* as convergent operation

set_inactive is actually a kind of operation that is passing certain
value from active threads to inactive threads. In later WWM operation,
the activated threads which were disabled before would read such
values passed to them by set_inactive operation. So I think the
set_inactive is a convergent operation.

Differential Revision: https://reviews.llvm.org/D147683

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 61a1ef44b5418..40adc6178c214 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -228,7 +228,7 @@ def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
 
 // Invert the exec mask and overwrite the inactive lanes of dst with inactive,
 // restoring it after we're done.
-let Defs = [SCC] in {
+let Defs = [SCC], isConvergent = 1 in {
 def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
   (ins VSrc_b32: $src, VSrc_b32:$inactive),
   [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {

diff  --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
index b7ae33f55f5d2..7738a2daecc9e 100644
--- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -5,34 +5,34 @@ define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 i
 ; GCN-LABEL: should_not_hoist_set_inactive:
 ; GCN:       ; %bb.0: ; %.entry
 ; GCN-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 3, v1
-; GCN-NEXT:    v_cmp_eq_u32_e64 s6, 0, v0
-; GCN-NEXT:    v_cmp_ne_u32_e64 s5, 0, v2
-; GCN-NEXT:    v_mov_b32_e32 v3, s4
-; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
-; GCN-NEXT:    v_mov_b32_e32 v3, 0
-; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
-; GCN-NEXT:    s_mov_b32 s4, 0
+; GCN-NEXT:    v_cmp_eq_u32_e64 s5, 0, v0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s6, 0, v2
+; GCN-NEXT:    s_mov_b32 s7, 0
 ; GCN-NEXT:    s_branch .LBB0_2
 ; GCN-NEXT:  .LBB0_1: ; %bb4
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; GCN-NEXT:    s_waitcnt_depctr 0xffe3
-; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s7
-; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s4
+; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; GCN-NEXT:    s_andn2_b32 exec_lo, exec_lo, s7
 ; GCN-NEXT:    s_cbranch_execz .LBB0_5
 ; GCN-NEXT:  .LBB0_2: ; %bb
 ; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    s_and_b32 s7, exec_lo, s5
-; GCN-NEXT:    s_or_b32 s4, s7, s4
-; GCN-NEXT:    s_and_saveexec_b32 s7, vcc_lo
+; GCN-NEXT:    s_and_b32 s8, exec_lo, s6
+; GCN-NEXT:    s_or_b32 s7, s8, s7
+; GCN-NEXT:    s_and_saveexec_b32 s8, vcc_lo
 ; GCN-NEXT:    s_cbranch_execz .LBB0_1
 ; GCN-NEXT:  ; %bb.3: ; %bb1
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
-; GCN-NEXT:    s_or_saveexec_b32 s8, -1
+; GCN-NEXT:    v_mov_b32_e32 v3, s4
+; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
+; GCN-NEXT:    v_mov_b32_e32 v3, 0
+; GCN-NEXT:    s_not_b32 exec_lo, exec_lo
+; GCN-NEXT:    s_or_saveexec_b32 s9, -1
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    v_mov_b32_dpp v4, v3 row_xmask:1 row_mask:0xf bank_mask:0xf
-; GCN-NEXT:    s_mov_b32 exec_lo, s8
+; GCN-NEXT:    s_mov_b32 exec_lo, s9
 ; GCN-NEXT:    v_mov_b32_e32 v0, v4
-; GCN-NEXT:    s_and_b32 exec_lo, exec_lo, s6
+; GCN-NEXT:    s_and_b32 exec_lo, exec_lo, s5
 ; GCN-NEXT:    s_cbranch_execz .LBB0_1
 ; GCN-NEXT:  ; %bb.4: ; %bb2
 ; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1


        


More information about the llvm-commits mailing list