[llvm] d5ab379 - AMDGPU: Add baseline test for broken machine sinking

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 18 03:15:56 PDT 2023


Author: Matt Arsenault
Date: 2023-07-18T06:15:50-04:00
New Revision: d5ab379506252f4955c74841f1e12caa97317a57

URL: https://github.com/llvm/llvm-project/commit/d5ab379506252f4955c74841f1e12caa97317a57
DIFF: https://github.com/llvm/llvm-project/commit/d5ab379506252f4955c74841f1e12caa97317a57.diff

LOG: AMDGPU: Add baseline test for broken machine sinking

Added: 
    llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
    llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir

Modified: 
    llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
new file mode 100644
index 00000000000000..b8e74bc7db09a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck %s
+
+; A VGPR loop variable was incorrectly sunk into a flow block, past
+; the si_end_cf reconvergence point.
+
+define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49280.not, i32 %arg1, i1 %cmp108) {
+; CHECK-LABEL: machinesink_loop_variable_out_of_divergent_loop:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v1, 1, v1
+; CHECK-NEXT:    v_and_b32_e32 v3, 1, v3
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s4, 1, v1
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; CHECK-NEXT:    s_xor_b32 s6, s4, -1
+; CHECK-NEXT:    s_inst_prefetch 0x1
+; CHECK-NEXT:    s_branch .LBB0_3
+; CHECK-NEXT:    .p2align 6
+; CHECK-NEXT:  .LBB0_1: ; %Flow
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:    v_add_nc_u32_e32 v4, -4, v4
+; CHECK-NEXT:  .LBB0_2: ; %Flow1
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s7
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s4, 0, v3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; j lastloop entry
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_or_b32 s5, s4, s5
+; CHECK-NEXT:    s_andn2_b32 exec_lo, exec_lo, s5
+; CHECK-NEXT:    s_cbranch_execz .LBB0_8
+; CHECK-NEXT:  .LBB0_3: ; %for.body33
+; CHECK-NEXT:    ; =>This Loop Header: Depth=1
+; CHECK-NEXT:    ; Child Loop BB0_6 Depth 2
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    s_and_saveexec_b32 s7, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB0_2
+; CHECK-NEXT:  ; %bb.4: ; %for.body51.preheader
+; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT:    s_mov_b32 s8, 0
+; CHECK-NEXT:    s_mov_b32 s9, 0
+; CHECK-NEXT:    s_branch .LBB0_6
+; CHECK-NEXT:    .p2align 6
+; CHECK-NEXT:  .LBB0_5: ; %if.end118
+; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s4
+; CHECK-NEXT:    s_add_i32 s9, s9, 4
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; backedge
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_add_nc_u32_e32 v4, s9, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s4, v4, v0
+; CHECK-NEXT:    s_or_b32 s8, s4, s8
+; CHECK-NEXT:    s_andn2_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:    s_cbranch_execz .LBB0_1
+; CHECK-NEXT:  .LBB0_6: ; %for.body51
+; CHECK-NEXT:    ; Parent Loop BB0_3 Depth=1
+; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    v_mov_b32_e32 v3, 1
+; CHECK-NEXT:    s_and_saveexec_b32 s4, vcc_lo
+; CHECK-NEXT:    s_cbranch_execz .LBB0_5
+; CHECK-NEXT:  ; %bb.7: ; %if.then112
+; CHECK-NEXT:    ; in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT:    s_add_i32 s10, s9, 4
+; CHECK-NEXT:    v_mov_b32_e32 v3, 0
+; CHECK-NEXT:    v_mov_b32_e32 v4, s10
+; CHECK-NEXT:    ds_write_b32 v1, v4
+; CHECK-NEXT:    s_branch .LBB0_5
+; CHECK-NEXT:  .LBB0_8: ; %for.body159.preheader
+; CHECK-NEXT:    s_inst_prefetch 0x2
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s5
+; CHECK-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT:  .LBB0_9: ; %for.body159
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_9
+; CHECK-NEXT:  ; %bb.10: ; %DummyReturnBlock
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  br label %for.body33
+
+for.body33:                                       ; preds = %for.end121, %entry
+  br i1 %cmp49280.not, label %for.end121, label %for.body51
+
+for.body51:                                       ; preds = %if.end118, %for.body33
+  %add48284 = phi i32 [ %add48, %if.end118 ], [ %arg1, %for.body33 ]
+  %collision.0281 = phi i32 [ %inc119, %if.end118 ], [ 1, %for.body33 ]
+  br i1 %cmp108, label %if.then112, label %if.end118
+
+if.then112:                                       ; preds = %for.body51
+  %inc101 = add i32 %collision.0281, 3
+  store i32 %inc101, ptr addrspace(3) null, align 2147483648
+  br label %if.end118
+
+if.end118:                                        ; preds = %if.then112, %for.body51
+  %thCollNum.5 = phi i32 [ 0, %if.then112 ], [ 1, %for.body51 ]
+  %inc119 = add i32 %collision.0281, 4
+  tail call void asm sideeffect "; backedge", ""()
+  %add48 = add i32 %add48284, 4
+  %cmp49 = icmp ult i32 %add48, %arg
+  br i1 %cmp49, label %for.body51, label %for.end121
+
+for.end121:                                       ; preds = %if.end118, %for.body33
+  %thCollNum.1.lcssa = phi i32 [ 0, %for.body33 ], [ %thCollNum.5, %if.end118 ]
+  %j.0.lcssa = phi i32 [ 0, %for.body33 ], [ %add48284, %if.end118 ]
+  %i5 = tail call i32 asm sideeffect "; j lastloop entry", "=v,0"(i32 %j.0.lcssa)
+  %cmp31 = icmp eq i32 %thCollNum.1.lcssa, 0
+  br i1 %cmp31, label %for.body33, label %for.body159
+
+for.body159:                                      ; preds = %for.body159, %for.end121
+  br label %for.body159
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir
new file mode 100644
index 00000000000000..037a285794120d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir
@@ -0,0 +1,225 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o -  %s | FileCheck %s
+
+# A VGPR loop variable was incorrectly sunk into a flow block, past
+# the si_end_cf reconvergence point.
+
+---
+name:            machinesink_loop_vgpr_out_of_divergent_loop
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.6(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
+  ; CHECK-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc
+  ; CHECK-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]]
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.7(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.7(0x40000000), %bb.8(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8:
+  ; CHECK-NEXT:   SI_RETURN
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr8
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sreg_32 = COPY $sgpr8
+    %2:vgpr_32 = COPY $vgpr1
+
+  bb.1:
+    %3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    %4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_NOP 0
+
+  bb.4:
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    %5:vgpr_32 = V_ADD_U32_e64 %0, %1, 0, implicit $exec
+    %6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc
+    SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.5:
+    %7:vgpr_32 = PHI %0, %bb.4
+    SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    INLINEASM &"", 1, implicit %5
+    S_BRANCH %bb.2
+
+  bb.6:
+    SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.7
+
+  bb.7:
+    S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+    S_BRANCH %bb.8
+
+  bb.8:
+    SI_RETURN
+
+...
+
+# The same testcase, except the relevant instruction is scalar and
+# could be legally sunk.
+---
+name:            machinesink_loop_sgpr_out_of_divergent_loop
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr8, $sgpr9, $sgpr10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.6(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.5(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
+  ; CHECK-NEXT:   [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc
+  ; CHECK-NEXT:   SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4
+  ; CHECK-NEXT:   SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def dead $scc
+  ; CHECK-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[S_ADD_I32_]]
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.7(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.7(0x40000000), %bb.8(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8:
+  ; CHECK-NEXT:   SI_RETURN
+  bb.0:
+    liveins: $sgpr8, $sgpr9, $sgpr10
+
+    %0:sreg_32 = COPY $sgpr8
+    %1:sreg_32 = COPY $sgpr9
+    %2:sreg_32 = COPY $sgpr10
+
+  bb.1:
+    %3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    %4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    S_NOP 0
+
+  bb.4:
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    %5:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc
+    %6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc
+    SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.5:
+    %7:vgpr_32 = PHI %0, %bb.4
+    SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    INLINEASM &"", 1, implicit %5
+    S_BRANCH %bb.2
+
+  bb.6:
+    SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.7
+
+  bb.7:
+    S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+    S_BRANCH %bb.8
+
+  bb.8:
+    SI_RETURN
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
index 16edbf1d833782..e354d928e73a11 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
@@ -211,3 +211,277 @@ body:             |
     S_ENDPGM 0
 
 ...
+
+---
+name:            machinesink_loop_vgpr_out_of_divergent_loop_postra
+tracksRegLiveness: true
+body:             |
+  ; GFX10-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop_postra
+  ; GFX10: bb.0:
+  ; GFX10-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr8, $vgpr0, $vgpr1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.7(0x40000000), %bb.2(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr8, $vgpr0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.7, implicit $exec
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr8, $vgpr0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.3:
+  ; GFX10-NEXT:   successors: %bb.5(0x40000000), %bb.4(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr8, $vgpr0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.4
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.4:
+  ; GFX10-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.5:
+  ; GFX10-NEXT:   successors: %bb.3(0x40000000), %bb.6(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   renamable $vgpr1 = V_ADD_U32_e64 $sgpr8, $vgpr0, 0, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+  ; GFX10-NEXT:   renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+  ; GFX10-NEXT:   $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.6
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.6:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0, $vgpr1
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+  ; GFX10-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $vgpr1
+  ; GFX10-NEXT:   S_BRANCH %bb.3
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.7:
+  ; GFX10-NEXT:   successors: %bb.1(0x40000000), %bb.8(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr8, $vgpr0
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.8:
+  ; GFX10-NEXT:   successors: %bb.9(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.9:
+  ; GFX10-NEXT:   successors: %bb.9(0x40000000), %bb.10(0x40000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   S_CBRANCH_VCCNZ %bb.9, implicit undef $vcc
+  ; GFX10-NEXT:   S_BRANCH %bb.10
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.10:
+  ; GFX10-NEXT:   SI_RETURN
+  bb.0:
+    liveins: $sgpr8, $vgpr0, $vgpr1
+
+  bb.1:
+    liveins: $sgpr8, $vgpr0
+
+    $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+    S_CBRANCH_EXECZ %bb.6, implicit $exec
+
+  bb.9:
+    liveins: $sgpr4, $sgpr8, $vgpr0
+
+
+  bb.2:
+    liveins: $sgpr4, $sgpr8, $vgpr0
+
+    $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+
+
+  bb.4:
+    liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+
+    renamable $vgpr1 = V_ADD_U32_e64 $sgpr8, $vgpr0, 0, implicit $exec
+    renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+    renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+
+    $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.2, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.5:
+    liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0, $vgpr1
+
+    $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+    INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $vgpr1
+    S_BRANCH %bb.2
+
+  bb.6:
+    liveins: $sgpr4, $sgpr8, $vgpr0
+
+    $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.10:
+
+  bb.7:
+    S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+    S_BRANCH %bb.8
+
+  bb.8:
+    SI_RETURN
+
+...
+
+---
+name:            machinesink_loop_sgpr_out_of_divergent_loop_postra
+tracksRegLiveness: true
+body:             |
+  ; GFX10-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop_postra
+  ; GFX10: bb.0:
+  ; GFX10-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr8, $sgpr9, $sgpr10
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.1:
+  ; GFX10-NEXT:   successors: %bb.7(0x40000000), %bb.2(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.7, implicit $exec
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.2:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.3:
+  ; GFX10-NEXT:   successors: %bb.5(0x40000000), %bb.4(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GFX10-NEXT:   renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.4
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.4:
+  ; GFX10-NEXT:   successors: %bb.5(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.5:
+  ; GFX10-NEXT:   successors: %bb.3(0x40000000), %bb.6(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   renamable $sgpr6 = S_ADD_I32 renamable $sgpr8, renamable $sgpr9, implicit-def dead $scc
+  ; GFX10-NEXT:   renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+  ; GFX10-NEXT:   renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+  ; GFX10-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */
+  ; GFX10-NEXT:   $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
+  ; GFX10-NEXT:   S_BRANCH %bb.6
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.6:
+  ; GFX10-NEXT:   successors: %bb.3(0x80000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+  ; GFX10-NEXT:   INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $sgpr6
+  ; GFX10-NEXT:   S_BRANCH %bb.3
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.7:
+  ; GFX10-NEXT:   successors: %bb.1(0x40000000), %bb.8(0x40000000)
+  ; GFX10-NEXT:   liveins: $sgpr4, $sgpr8, $sgpr9
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+  ; GFX10-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.8:
+  ; GFX10-NEXT:   successors: %bb.9(0x80000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.9:
+  ; GFX10-NEXT:   successors: %bb.9(0x40000000), %bb.10(0x40000000)
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT:   S_CBRANCH_VCCNZ %bb.9, implicit undef $vcc
+  ; GFX10-NEXT:   S_BRANCH %bb.10
+  ; GFX10-NEXT: {{  $}}
+  ; GFX10-NEXT: bb.10:
+  ; GFX10-NEXT:   SI_RETURN
+  bb.0:
+    liveins: $sgpr8, $sgpr9, $sgpr10
+
+
+  bb.1:
+    liveins: $sgpr8, $sgpr9
+
+    $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+    S_CBRANCH_EXECZ %bb.6, implicit $exec
+
+  bb.9:
+    liveins: $sgpr4, $sgpr8, $sgpr9
+
+
+  bb.2:
+    liveins: $sgpr4, $sgpr8, $sgpr9
+
+    $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+    renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+    S_CBRANCH_EXECZ %bb.4, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.3:
+    liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+
+
+  bb.4:
+    liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+
+    renamable $sgpr6 = S_ADD_I32 renamable $sgpr8, renamable $sgpr9, implicit-def dead $scc
+    renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+    renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.2, implicit $exec
+    S_BRANCH %bb.5
+
+  bb.5:
+    liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr8, $sgpr9
+
+    $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+    INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $sgpr6
+    S_BRANCH %bb.2
+
+  bb.6:
+    liveins: $sgpr4, $sgpr8, $sgpr9
+
+    $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.10:
+
+  bb.7:
+    S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+    S_BRANCH %bb.8
+
+  bb.8:
+    SI_RETURN
+
+...


        


More information about the llvm-commits mailing list