[llvm] StructurizeCFG: Use poison instead of undef (PR #130459)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 8 19:45:31 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
There are a surprising number of codegen changes from this.
---
Patch is 247.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130459.diff
26 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (+16-18)
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+18-18)
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+99-114)
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll (+23-23)
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+132-132)
- (modified) llvm/test/CodeGen/AMDGPU/loop_break.ll (+101-99)
- (modified) llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll (+5-6)
- (modified) llvm/test/CodeGen/AMDGPU/multilevel-break.ll (+59-61)
- (modified) llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll (+12-11)
- (modified) llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll (+4-3)
- (modified) llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll (+6-7)
- (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+44-34)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll (+4-5)
- (modified) llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll (+2-2)
- (modified) llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll (+9-9)
- (modified) llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll (+2-3)
- (modified) llvm/test/Transforms/StructurizeCFG/loop-break-phi.ll (+13-14)
- (modified) llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll (+10-11)
- (modified) llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll (+4-5)
- (modified) llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll (+4-5)
- (modified) llvm/test/Transforms/StructurizeCFG/workarounds/needs-fix-reducible.ll (+5-9)
- (modified) llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll (+9-21)
- (modified) llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll (+1-2)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index b1f742b838f2a..28683afe0acd7 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -858,10 +858,10 @@ void StructurizeCFG::setPhiValues() {
PhiMap &Map = DeletedPhis[To];
SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
for (const auto &[Phi, Incoming] : Map) {
- Value *Undef = UndefValue::get(Phi->getType());
+ Value *Poison = PoisonValue::get(Phi->getType());
Updater.Initialize(Phi->getType(), "");
- Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
- Updater.AddAvailableValue(To, Undef);
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
+ Updater.AddAvailableValue(To, Poison);
// Use leader phi's incoming if there is.
auto LeaderIt = PhiClasses.findLeader(Phi);
@@ -890,7 +890,7 @@ void StructurizeCFG::setPhiValues() {
if (Updater.HasValueForBlock(UB))
continue;
- Updater.AddAvailableValue(UB, Undef);
+ Updater.AddAvailableValue(UB, Poison);
}
for (BasicBlock *FI : From)
@@ -1181,9 +1181,9 @@ void StructurizeCFG::rebuildSSA() {
continue;
if (!Initialized) {
- Value *Undef = UndefValue::get(I.getType());
+ Value *Poison = PoisonValue::get(I.getType());
Updater.Initialize(I.getType(), "");
- Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
Updater.AddAvailableValue(BB, &I);
Initialized = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index cc768a2cdf61f..5fa991cd27785 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -199,33 +199,31 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0
; CHECK-NEXT: s_mov_b64 s[0:1], 0
-; CHECK-NEXT: s_branch .LBB5_3
-; CHECK-NEXT: .LBB5_1: ; %bb4
-; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
-; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
-; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
-; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
-; CHECK-NEXT: .LBB5_2: ; %Flow
-; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
+; CHECK-NEXT: s_branch .LBB5_2
+; CHECK-NEXT: .LBB5_1: ; %Flow
+; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: s_and_b64 s[4:5], exec, s[2:3]
; CHECK-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; CHECK-NEXT: s_cbranch_execz .LBB5_5
-; CHECK-NEXT: .LBB5_3: ; %bb1
+; CHECK-NEXT: s_cbranch_execz .LBB5_4
+; CHECK-NEXT: .LBB5_2: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; CHECK-NEXT: s_and_b64 s[4:5], exec, -1
; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
-; CHECK-NEXT: s_cbranch_vccz .LBB5_1
-; CHECK-NEXT: ; %bb.4: ; in Loop: Header=BB5_3 Depth=1
-; CHECK-NEXT: ; implicit-def: $vgpr1
-; CHECK-NEXT: s_branch .LBB5_2
-; CHECK-NEXT: .LBB5_5: ; %bb9
+; CHECK-NEXT: s_cbranch_vccnz .LBB5_1
+; CHECK-NEXT: ; %bb.3: ; %bb4
+; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
+; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
+; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
+; CHECK-NEXT: s_branch .LBB5_1
+; CHECK-NEXT: .LBB5_4: ; %bb9
; CHECK-NEXT: s_endpgm
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index e70e34fa0ba5d..3116b5d59a097 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -646,13 +646,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: v_add_f32_e32 v9, v9, v15
; GFX908-NEXT: v_add_f32_e32 v10, v10, v12
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
-; GFX908-NEXT: s_mov_b64 s[20:21], -1
; GFX908-NEXT: s_branch .LBB3_4
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
+; GFX908-NEXT: s_mov_b64 s[20:21], -1
; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX908-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
@@ -798,13 +798,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27]
; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17]
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
-; GFX90A-NEXT: s_mov_b64 s[20:21], -1
; GFX90A-NEXT: s_branch .LBB3_4
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
+; GFX90A-NEXT: s_mov_b64 s[20:21], -1
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index 36fa7b97b3c77..a6af63b816573 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s12, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s52, 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_9
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
; CHECK-NEXT: s_cmp_eq_u32 s54, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
@@ -26,36 +26,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: s_mov_b32 s48, 0
-; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
-; CHECK-NEXT: s_cbranch_vccz .LBB0_6
-; CHECK-NEXT: s_branch .LBB0_7
+; CHECK-NEXT: s_mov_b32 s18, 0
+; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_mov_b32 s14, s12
; CHECK-NEXT: s_mov_b32 s15, s12
; CHECK-NEXT: s_mov_b32 s13, s12
; CHECK-NEXT: s_mov_b64 s[50:51], s[14:15]
; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
-; CHECK-NEXT: s_branch .LBB0_7
+; CHECK-NEXT: s_branch .LBB0_8
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
-; CHECK-NEXT: s_mov_b32 s48, 1.0
+; CHECK-NEXT: s_mov_b32 s18, 1.0
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
+; CHECK-NEXT: .LBB0_6: ; %Flow
+; CHECK-NEXT: s_mov_b32 s48, 1.0
+; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_mov_b32 s49, s48
; CHECK-NEXT: s_mov_b32 s50, s48
; CHECK-NEXT: s_mov_b32 s51, s48
-; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
-; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
-; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_8
+; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i
; CHECK-NEXT: s_add_u32 s12, s8, 40
; CHECK-NEXT: s_addc_u32 s13, s9, 0
-; CHECK-NEXT: s_getpc_b64 s[18:19]
-; CHECK-NEXT: s_add_u32 s18, s18, _Z3dotDv3_fS_ at gotpcrel32@lo+4
-; CHECK-NEXT: s_addc_u32 s19, s19, _Z3dotDv3_fS_ at gotpcrel32@hi+12
+; CHECK-NEXT: s_getpc_b64 s[20:21]
+; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_ at gotpcrel32@lo+4
+; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_ at gotpcrel32@hi+12
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
+; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT: v_add_f32_e64 v1, s17, s48
+; CHECK-NEXT: v_add_f32_e64 v1, s17, s18
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
; CHECK-NEXT: s_mov_b32 s12, s14
@@ -67,18 +67,18 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s14, s16
; CHECK-NEXT: s_mov_b32 s48, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21]
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
; CHECK-NEXT: s_mov_b32 s49, s48
; CHECK-NEXT: s_mov_b32 s50, s48
; CHECK-NEXT: s_mov_b32 s51, s48
-; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
+; CHECK-NEXT: .LBB0_8: ; %if.end294.i.i
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
+; CHECK-NEXT: .LBB0_9: ; %kernel_direct_lighting.exit
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
; CHECK-NEXT: v_mov_b32_e32 v0, s48
; CHECK-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index e43a021802644..266216c4d8b50 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -13,16 +13,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
- ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 0, implicit-def $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
+ ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_XOR_B64 renamable $sgpr12_sgpr13, -1, implicit-def dead $scc
- ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 8, implicit-def $scc
+ ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 8, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_XOR_B64 killed renamable $sgpr18_sgpr19, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec
@@ -33,7 +33,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1.bb103:
; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -41,10 +41,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr44, $sgpr45, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr46, $sgpr47, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
@@ -54,15 +52,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3.Flow17:
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr17 = V_MOV_B32_e32 0, implicit $exec
; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.57, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.4.bb15:
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr4 = COPY renamable $sgpr25, implicit $exec
@@ -79,7 +79,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
@@ -107,24 +107,23 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
- ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/130459
More information about the llvm-commits
mailing list