[llvm] d5ab379 - AMDGPU: Add baseline test for broken machine sinking
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 18 03:15:56 PDT 2023
Author: Matt Arsenault
Date: 2023-07-18T06:15:50-04:00
New Revision: d5ab379506252f4955c74841f1e12caa97317a57
URL: https://github.com/llvm/llvm-project/commit/d5ab379506252f4955c74841f1e12caa97317a57
DIFF: https://github.com/llvm/llvm-project/commit/d5ab379506252f4955c74841f1e12caa97317a57.diff
LOG: AMDGPU: Add baseline test for broken machine sinking
Added:
llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir
Modified:
llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
new file mode 100644
index 00000000000000..b8e74bc7db09a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck %s
+
+; A VGPR loop variable was incorrectly sunk into a flow block, past
+; the si_end_cf reconvergence point.
+
+define void @machinesink_loop_variable_out_of_divergent_loop(i32 %arg, i1 %cmp49280.not, i32 %arg1, i1 %cmp108) {
+; CHECK-LABEL: machinesink_loop_variable_out_of_divergent_loop:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v1, 1, v1
+; CHECK-NEXT: v_and_b32_e32 v3, 1, v3
+; CHECK-NEXT: s_mov_b32 s5, 0
+; CHECK-NEXT: v_cmp_eq_u32_e64 s4, 1, v1
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; CHECK-NEXT: s_xor_b32 s6, s4, -1
+; CHECK-NEXT: s_inst_prefetch 0x1
+; CHECK-NEXT: s_branch .LBB0_3
+; CHECK-NEXT: .p2align 6
+; CHECK-NEXT: .LBB0_1: ; %Flow
+; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT: v_add_nc_u32_e32 v4, -4, v4
+; CHECK-NEXT: .LBB0_2: ; %Flow1
+; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7
+; CHECK-NEXT: v_cmp_ne_u32_e64 s4, 0, v3
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; j lastloop entry
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_or_b32 s5, s4, s5
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
+; CHECK-NEXT: s_cbranch_execz .LBB0_8
+; CHECK-NEXT: .LBB0_3: ; %for.body33
+; CHECK-NEXT: ; =>This Loop Header: Depth=1
+; CHECK-NEXT: ; Child Loop BB0_6 Depth 2
+; CHECK-NEXT: v_mov_b32_e32 v4, 0
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: s_and_saveexec_b32 s7, s6
+; CHECK-NEXT: s_cbranch_execz .LBB0_2
+; CHECK-NEXT: ; %bb.4: ; %for.body51.preheader
+; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
+; CHECK-NEXT: s_mov_b32 s8, 0
+; CHECK-NEXT: s_mov_b32 s9, 0
+; CHECK-NEXT: s_branch .LBB0_6
+; CHECK-NEXT: .p2align 6
+; CHECK-NEXT: .LBB0_5: ; %if.end118
+; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4
+; CHECK-NEXT: s_add_i32 s9, s9, 4
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; backedge
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_add_nc_u32_e32 v4, s9, v2
+; CHECK-NEXT: v_cmp_ge_u32_e64 s4, v4, v0
+; CHECK-NEXT: s_or_b32 s8, s4, s8
+; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT: s_cbranch_execz .LBB0_1
+; CHECK-NEXT: .LBB0_6: ; %for.body51
+; CHECK-NEXT: ; Parent Loop BB0_3 Depth=1
+; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
+; CHECK-NEXT: v_mov_b32_e32 v3, 1
+; CHECK-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; CHECK-NEXT: s_cbranch_execz .LBB0_5
+; CHECK-NEXT: ; %bb.7: ; %if.then112
+; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
+; CHECK-NEXT: s_add_i32 s10, s9, 4
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: v_mov_b32_e32 v4, s10
+; CHECK-NEXT: ds_write_b32 v1, v4
+; CHECK-NEXT: s_branch .LBB0_5
+; CHECK-NEXT: .LBB0_8: ; %for.body159.preheader
+; CHECK-NEXT: s_inst_prefetch 0x2
+; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5
+; CHECK-NEXT: s_mov_b32 vcc_lo, exec_lo
+; CHECK-NEXT: .LBB0_9: ; %for.body159
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_9
+; CHECK-NEXT: ; %bb.10: ; %DummyReturnBlock
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br label %for.body33
+
+for.body33: ; preds = %for.end121, %entry
+ br i1 %cmp49280.not, label %for.end121, label %for.body51
+
+for.body51: ; preds = %if.end118, %for.body33
+ %add48284 = phi i32 [ %add48, %if.end118 ], [ %arg1, %for.body33 ]
+ %collision.0281 = phi i32 [ %inc119, %if.end118 ], [ 1, %for.body33 ]
+ br i1 %cmp108, label %if.then112, label %if.end118
+
+if.then112: ; preds = %for.body51
+ %inc101 = add i32 %collision.0281, 3
+ store i32 %inc101, ptr addrspace(3) null, align 2147483648
+ br label %if.end118
+
+if.end118: ; preds = %if.then112, %for.body51
+ %thCollNum.5 = phi i32 [ 0, %if.then112 ], [ 1, %for.body51 ]
+ %inc119 = add i32 %collision.0281, 4
+ tail call void asm sideeffect "; backedge", ""()
+ %add48 = add i32 %add48284, 4
+ %cmp49 = icmp ult i32 %add48, %arg
+ br i1 %cmp49, label %for.body51, label %for.end121
+
+for.end121: ; preds = %if.end118, %for.body33
+ %thCollNum.1.lcssa = phi i32 [ 0, %for.body33 ], [ %thCollNum.5, %if.end118 ]
+ %j.0.lcssa = phi i32 [ 0, %for.body33 ], [ %add48284, %if.end118 ]
+ %i5 = tail call i32 asm sideeffect "; j lastloop entry", "=v,0"(i32 %j.0.lcssa)
+ %cmp31 = icmp eq i32 %thCollNum.1.lcssa, 0
+ br i1 %cmp31, label %for.body33, label %for.body159
+
+for.body159: ; preds = %for.body159, %for.end121
+ br label %for.body159
+}
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir
new file mode 100644
index 00000000000000..037a285794120d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-loop-var-out-of-divergent-loop-swdev407790.mir
@@ -0,0 +1,225 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck %s
+
+# A VGPR loop variable was incorrectly sunk into a flow block, past
+# the si_end_cf reconvergence point.
+
+---
+name: machinesink_loop_vgpr_out_of_divergent_loop
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ ; CHECK-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
+ ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc
+ ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4
+ ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]]
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: SI_RETURN
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr8
+
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = COPY $sgpr8
+ %2:vgpr_32 = COPY $vgpr1
+
+ bb.1:
+ %3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ %4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_NOP 0
+
+ bb.4:
+ INLINEASM &"", 1 /* sideeffect attdialect */
+ %5:vgpr_32 = V_ADD_U32_e64 %0, %1, 0, implicit $exec
+ %6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc
+ SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ %7:vgpr_32 = PHI %0, %bb.4
+ SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ INLINEASM &"", 1, implicit %5
+ S_BRANCH %bb.2
+
+ bb.6:
+ SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.7
+
+ bb.7:
+ S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+ S_BRANCH %bb.8
+
+ bb.8:
+ SI_RETURN
+
+...
+
+# The same testcase, except the relevant instruction is scalar and
+# could be legally sunk.
+---
+name: machinesink_loop_sgpr_out_of_divergent_loop
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ ; CHECK-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr8, $sgpr9, $sgpr10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr9
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF [[COPY1]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
+ ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK killed [[SI_IF1]], [[SI_IF]], implicit-def dead $scc
+ ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.4
+ ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def dead $scc
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[S_ADD_I32_]]
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.7(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: SI_LOOP [[SI_IF]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ ; CHECK-NEXT: S_BRANCH %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8:
+ ; CHECK-NEXT: SI_RETURN
+ bb.0:
+ liveins: $sgpr8, $sgpr9, $sgpr10
+
+ %0:sreg_32 = COPY $sgpr8
+ %1:sreg_32 = COPY $sgpr9
+ %2:sreg_32 = COPY $sgpr10
+
+ bb.1:
+ %3:sreg_32 = SI_IF %1, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ %4:sreg_32 = SI_IF %1, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ S_NOP 0
+
+ bb.4:
+ INLINEASM &"", 1 /* sideeffect attdialect */
+ %5:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc
+ %6:sreg_32 = SI_IF_BREAK killed %4, %3, implicit-def dead $scc
+ SI_LOOP %6, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ %7:vgpr_32 = PHI %0, %bb.4
+ SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ INLINEASM &"", 1, implicit %5
+ S_BRANCH %bb.2
+
+ bb.6:
+ SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.7
+
+ bb.7:
+ S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+ S_BRANCH %bb.8
+
+ bb.8:
+ SI_RETURN
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
index 16edbf1d833782..e354d928e73a11 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir
@@ -211,3 +211,277 @@ body: |
S_ENDPGM 0
...
+
+---
+name: machinesink_loop_vgpr_out_of_divergent_loop_postra
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: machinesink_loop_vgpr_out_of_divergent_loop_postra
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr8, $vgpr0, $vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr8, $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: successors: %bb.5(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.5:
+ ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.6(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $vgpr1 = V_ADD_U32_e64 $sgpr8, $vgpr0, 0, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ ; GFX10-NEXT: renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+ ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.6:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0, $vgpr1
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ ; GFX10-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $vgpr1
+ ; GFX10-NEXT: S_BRANCH %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.7:
+ ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $vgpr0
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.8:
+ ; GFX10-NEXT: successors: %bb.9(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.9:
+ ; GFX10-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit undef $vcc
+ ; GFX10-NEXT: S_BRANCH %bb.10
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.10:
+ ; GFX10-NEXT: SI_RETURN
+ bb.0:
+ liveins: $sgpr8, $vgpr0, $vgpr1
+
+ bb.1:
+ liveins: $sgpr8, $vgpr0
+
+ $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+ S_CBRANCH_EXECZ %bb.6, implicit $exec
+
+ bb.9:
+ liveins: $sgpr4, $sgpr8, $vgpr0
+
+
+ bb.2:
+ liveins: $sgpr4, $sgpr8, $vgpr0
+
+ $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr8, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+
+
+ bb.4:
+ liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0
+
+ renamable $vgpr1 = V_ADD_U32_e64 $sgpr8, $vgpr0, 0, implicit $exec
+ renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+
+ $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ liveins: $sgpr4, $sgpr5, $sgpr8, $vgpr0, $vgpr1
+
+ $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $vgpr1
+ S_BRANCH %bb.2
+
+ bb.6:
+ liveins: $sgpr4, $sgpr8, $vgpr0
+
+ $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+ bb.10:
+
+ bb.7:
+ S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+ S_BRANCH %bb.8
+
+ bb.8:
+ SI_RETURN
+
+...
+
+---
+name: machinesink_loop_sgpr_out_of_divergent_loop_postra
+tracksRegLiveness: true
+body: |
+ ; GFX10-LABEL: name: machinesink_loop_sgpr_out_of_divergent_loop_postra
+ ; GFX10: bb.0:
+ ; GFX10-NEXT: successors: %bb.1(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr8, $sgpr9, $sgpr10
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.1:
+ ; GFX10-NEXT: successors: %bb.7(0x40000000), %bb.2(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.2:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.3:
+ ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX10-NEXT: renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.4
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.4:
+ ; GFX10-NEXT: successors: %bb.5(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.5:
+ ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.6(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: renamable $sgpr6 = S_ADD_I32 renamable $sgpr8, renamable $sgpr9, implicit-def dead $scc
+ ; GFX10-NEXT: renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ ; GFX10-NEXT: renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+ ; GFX10-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
+ ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec
+ ; GFX10-NEXT: S_BRANCH %bb.6
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.6:
+ ; GFX10-NEXT: successors: %bb.3(0x80000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ ; GFX10-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $sgpr6
+ ; GFX10-NEXT: S_BRANCH %bb.3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.7:
+ ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.8(0x40000000)
+ ; GFX10-NEXT: liveins: $sgpr4, $sgpr8, $sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+ ; GFX10-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.8:
+ ; GFX10-NEXT: successors: %bb.9(0x80000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.9:
+ ; GFX10-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit undef $vcc
+ ; GFX10-NEXT: S_BRANCH %bb.10
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: bb.10:
+ ; GFX10-NEXT: SI_RETURN
+ bb.0:
+ liveins: $sgpr8, $sgpr9, $sgpr10
+
+
+ bb.1:
+ liveins: $sgpr8, $sgpr9
+
+ $sgpr4 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr4 = S_XOR_B32 $exec_lo, killed renamable $sgpr4, implicit-def dead $scc
+ S_CBRANCH_EXECZ %bb.6, implicit $exec
+
+ bb.9:
+ liveins: $sgpr4, $sgpr8, $sgpr9
+
+
+ bb.2:
+ liveins: $sgpr4, $sgpr8, $sgpr9
+
+ $sgpr5 = S_AND_SAVEEXEC_B32 $sgpr9, implicit-def $exec, implicit-def $scc, implicit $exec
+ renamable $sgpr5 = S_XOR_B32 $exec_lo, killed renamable $sgpr5, implicit-def dead $scc
+ S_CBRANCH_EXECZ %bb.4, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+
+
+ bb.4:
+ liveins: $sgpr4, $sgpr5, $sgpr8, $sgpr9
+
+ renamable $sgpr6 = S_ADD_I32 renamable $sgpr8, renamable $sgpr9, implicit-def dead $scc
+ renamable $sgpr5 = S_AND_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ renamable $sgpr5 = S_OR_B32 killed renamable $sgpr5, renamable $sgpr4, implicit-def $scc
+ INLINEASM &"", 1 /* sideeffect attdialect */
+ $exec_lo = S_ANDN2_B32 $exec_lo, renamable $sgpr5, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ S_BRANCH %bb.5
+
+ bb.5:
+ liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr8, $sgpr9
+
+ $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr5, implicit-def $scc
+ INLINEASM &"", 1 /* sideeffect attdialect */, implicit killed renamable $sgpr6
+ S_BRANCH %bb.2
+
+ bb.6:
+ liveins: $sgpr4, $sgpr8, $sgpr9
+
+ $exec_lo = S_ANDN2_B32 $exec_lo, killed renamable $sgpr4, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+ bb.10:
+
+ bb.7:
+ S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
+ S_BRANCH %bb.8
+
+ bb.8:
+ SI_RETURN
+
+...
More information about the llvm-commits
mailing list