[llvm] [CodeGen] Do not remove IMPLICIT_DEF unless all uses have undef flag added (PR #188133)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 05:54:16 PDT 2026
https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/188133
>From af65e1932d4b5e2e8cdde6b92787e949dc68d7ce Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Mon, 23 Mar 2026 16:24:34 -0500
Subject: [PATCH 01/11] Do not remove IMPLICIT_DEF unless all uses have undef
flag added
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 17 +-
...der-no-live-segment-at-def-implicit-def.ll | 26 +-
.../branch-folding-implicit-def-subreg.ll | 112 ++++----
llvm/test/CodeGen/AMDGPU/call-constexpr.ll | 2 +
llvm/test/CodeGen/AMDGPU/call-skip.ll | 14 +-
.../AMDGPU/cross-block-use-is-not-abi-copy.ll | 14 +-
.../AMDGPU/global_atomics_scan_fadd.ll | 260 ++++++++++++++----
.../AMDGPU/global_atomics_scan_fmax.ll | 156 ++++++++---
.../AMDGPU/global_atomics_scan_fmin.ll | 156 ++++++++---
.../AMDGPU/global_atomics_scan_fsub.ll | 260 ++++++++++++++----
.../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 6 +-
...ne-sink-temporal-divergence-swdev407790.ll | 12 +
llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll | 1 +
.../CodeGen/AMDGPU/multi-use-implicit-def.mir | 49 ++++
llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll | 1 +
.../AMDGPU/promote-constOffset-to-imm.ll | 5 +
...tack-pointer-offset-relative-frameindex.ll | 4 +
.../AMDGPU/tuple-allocation-failure.ll | 70 ++---
.../AMDGPU/undef-handling-crash-in-ra.ll | 1 +
llvm/test/CodeGen/PowerPC/p10-spill-crun.ll | 2 +
llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll | 14 +-
.../CodeGen/Thumb2/2010-02-11-phi-cycle.ll | 2 +
llvm/test/CodeGen/X86/issue76416.ll | 1 +
.../X86/machine-trace-metrics-crash.ll | 1 +
.../CodeGen/X86/multi-use-implicit-def.mir | 15 +
llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 6 +-
.../X86/regalloc-advanced-split-cost.ll | 2 +-
.../CodeGen/X86/statepoint-two-results.ll | 1 +
.../CodeGen/X86/statepoint-vreg-invoke.ll | 5 +
.../X86/tail-dup-merge-loop-headers.ll | 2 +
30 files changed, 903 insertions(+), 314 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
create mode 100644 llvm/test/CodeGen/X86/multi-use-implicit-def.mir
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 6b58b6574f5bd..a05d520678cc0 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -105,8 +105,9 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// Look for the first instruction to use or define an alias.
MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
- bool Found = false;
+ bool ImplicitDefIsDead = false;
for (++UserMI; UserMI != UserE; ++UserMI) {
+ bool DefinesReg=false;
for (MachineOperand &MO : UserMI->operands()) {
if (!MO.isReg())
continue;
@@ -114,16 +115,20 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
continue;
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
- Found = true;
- if (MO.isUse())
+ if (!ImplicitDefIsDead && MO.isUse() )
MO.setIsUndef();
+ if (MO.isDef())
+ DefinesReg = true;
}
- if (Found)
+ if (DefinesReg) {
+ ImplicitDefIsDead = true;
break;
+ }
}
- // If we found the using MI, we can erase the IMPLICIT_DEF.
- if (Found) {
+ // If we have added an undef flag to all uses (i.e. we have found a redefining MI or
+ // there are no successors), we can erase the IMPLICIT_DEF.
+ if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
MI->eraseFromParent();
return;
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index 3bab9d0e80546..554d7eb2cfbdb 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -21,12 +21,12 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i
; CHECK-NEXT: s_cmp_lg_u32 s55, 0
-; CHECK-NEXT: s_mov_b32 s17, 0
-; CHECK-NEXT: s_cselect_b32 s12, -1, 0
-; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
+; CHECK-NEXT: s_mov_b32 s13, s15
+; CHECK-NEXT: s_cselect_b32 s17, -1, 0
+; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s17
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: s_mov_b32 s18, 0
+; CHECK-NEXT: s_mov_b32 s15, 0
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_mov_b32 s14, s12
@@ -36,36 +36,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
; CHECK-NEXT: s_branch .LBB0_8
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
-; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
-; CHECK-NEXT: s_mov_b32 s18, 1.0
-; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
+; CHECK-NEXT: v_cmp_lt_f32_e64 s17, s53, 0
+; CHECK-NEXT: s_mov_b32 s15, 1.0
+; CHECK-NEXT: s_mov_b32 s12, 0x7fc00000
; CHECK-NEXT: .LBB0_6: ; %Flow
; CHECK-NEXT: s_mov_b32 s48, 1.0
-; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
+; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s17
; CHECK-NEXT: s_mov_b32 s49, s48
; CHECK-NEXT: s_mov_b32 s50, s48
; CHECK-NEXT: s_mov_b32 s51, s48
; CHECK-NEXT: s_cbranch_vccnz .LBB0_8
; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i
-; CHECK-NEXT: s_add_u32 s12, s8, 40
-; CHECK-NEXT: s_addc_u32 s13, s9, 0
+; CHECK-NEXT: s_add_u32 s18, s8, 40
+; CHECK-NEXT: s_addc_u32 s19, s9, 0
; CHECK-NEXT: s_getpc_b64 s[20:21]
; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_ at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_ at gotpcrel32@hi+12
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT: v_add_f32_e64 v1, s17, s18
+; CHECK-NEXT: v_add_f32_e64 v1, s12, s15
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
-; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
; CHECK-NEXT: s_mov_b32 s12, s14
; CHECK-NEXT: v_or3_b32 v31, v0, v3, v2
; CHECK-NEXT: v_mov_b32_e32 v0, v1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mov_b32_e32 v2, 0
-; CHECK-NEXT: s_mov_b32 s13, s15
; CHECK-NEXT: s_mov_b32 s14, s16
; CHECK-NEXT: s_mov_b32 s48, 0
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21]
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index e0b83eeaa0faa..bc88acf521bd1 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -12,11 +12,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: renamable $sgpr17 = COPY $sgpr15
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg3.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr15 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4)
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 0, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 -1
@@ -40,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF implicit-def $vgpr16
; GFX90A-NEXT: renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2
@@ -51,7 +52,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3.Flow17:
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr6 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc
@@ -59,7 +60,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.4.bb15:
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
@@ -74,7 +75,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.5:
; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
@@ -109,7 +110,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr26 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr28 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -122,7 +123,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.7.Flow19:
; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -130,7 +131,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.8.Flow32:
; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -139,7 +140,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.9.bb89:
; GFX90A-NEXT: successors: %bb.10(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -147,7 +148,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.10.Flow33:
; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -156,7 +157,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.11.bb84:
; GFX90A-NEXT: successors: %bb.12(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -164,7 +165,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.12.Flow34:
; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -173,7 +174,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.13.bb79:
; GFX90A-NEXT: successors: %bb.14(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -181,7 +182,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.14.Flow35:
; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -190,14 +191,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.15.bb72:
; GFX90A-NEXT: successors: %bb.16(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f2, target-flags(amdgpu-gotprel32-hi) @f2, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_LOAD_DWORDX2_IMM killed renamable $sgpr12_sgpr13, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+ ; GFX90A-NEXT: dead $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: $sgpr12 = COPY killed renamable $sgpr14
- ; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr15
+ ; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr17
; GFX90A-NEXT: $sgpr14 = COPY killed renamable $sgpr16
; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr18_sgpr19, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
@@ -358,7 +360,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.35.bb20:
; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23)
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
@@ -398,14 +400,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.36.Flow21:
; GFX90A-NEXT: successors: %bb.6(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.6
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.37.bb27:
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30)
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
@@ -437,7 +439,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.38.Flow22:
; GFX90A-NEXT: successors: %bb.36(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -458,7 +460,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.39.bb34:
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37)
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
@@ -489,7 +491,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.40.Flow23:
; GFX90A-NEXT: successors: %bb.38(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -509,7 +511,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.41.bb41:
; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec
@@ -539,7 +541,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.42.Flow24:
; GFX90A-NEXT: successors: %bb.40(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -558,7 +560,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.43.bb55:
; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
@@ -570,7 +572,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.44:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
; GFX90A-NEXT: renamable $vgpr12_vgpr13 = IMPLICIT_DEF
@@ -590,7 +592,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.45.Flow26:
; GFX90A-NEXT: successors: %bb.47(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
@@ -606,7 +608,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.46.bb48:
; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
@@ -637,7 +639,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.47.Flow25:
; GFX90A-NEXT: successors: %bb.42(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr44_sgpr45, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -655,21 +657,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.48.bb63:
; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000)
- ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
+ ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.49:
; GFX90A-NEXT: successors: %bb.44(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1
; GFX90A-NEXT: S_BRANCH %bb.44
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.50.bb68:
; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec
; GFX90A-NEXT: renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -678,7 +680,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.51:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
@@ -698,16 +700,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.52.bb80:
; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: dead renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc
+ ; GFX90A-NEXT: dead renamable $sgpr15 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc
; GFX90A-NEXT: renamable $vgpr10 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
; GFX90A-NEXT: renamable $vgpr11, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: S_CBRANCH_SCC0 %bb.59, implicit killed $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.53:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1
@@ -726,7 +728,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.54.bb73:
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
; GFX90A-NEXT: renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
@@ -751,14 +753,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.55.Flow29:
; GFX90A-NEXT: successors: %bb.45(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr62_sgpr63, implicit-def $scc
; GFX90A-NEXT: S_BRANCH %bb.45
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.56.bb90:
; GFX90A-NEXT: successors: %bb.60(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -778,7 +780,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.57:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr20 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -823,7 +825,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
; GFX90A-NEXT: renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3)
- ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr17, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
; GFX90A-NEXT: renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
; GFX90A-NEXT: renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.5, addrspace 3)
@@ -832,7 +834,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.59.bb85:
; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec
; GFX90A-NEXT: renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec
@@ -854,14 +856,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.60.Flow31:
; GFX90A-NEXT: successors: %bb.61(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.61.Flow30:
; GFX90A-NEXT: successors: %bb.55(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
@@ -873,7 +875,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.62.bb140:
; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -881,14 +883,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.63.Flow13:
; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.64.bb159:
; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr6, implicit $exec
; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -897,21 +899,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.65.Flow10:
; GFX90A-NEXT: successors: %bb.66(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.66.Flow14:
; GFX90A-NEXT: successors: %bb.8(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec
; GFX90A-NEXT: S_BRANCH %bb.8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.67.bb161:
; GFX90A-NEXT: successors: %bb.65(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr27, killed $vgpr29, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr33, implicit $exec
@@ -930,7 +932,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.68.bb174:
; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $agpr0 = COPY killed renamable $vgpr14, implicit $exec
; GFX90A-NEXT: renamable $vgpr34 = V_OR_B32_e32 1, $vgpr32, implicit $exec
@@ -947,14 +949,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.69.Flow:
; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.70.bb186:
; GFX90A-NEXT: successors: %bb.71(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
@@ -983,7 +985,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.71.Flow9:
; GFX90A-NEXT: successors: %bb.63(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr24_sgpr25 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vgpr14 = COPY killed renamable $agpr0, implicit $exec
@@ -991,7 +993,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.72.bb196:
; GFX90A-NEXT: successors: %bb.69(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 $vgpr14, killed $vgpr24, implicit $exec
; GFX90A-NEXT: renamable $vgpr22 = V_OR_B32_e32 killed $vgpr2, killed $vgpr22, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
index fe0b0188d2d37..697576edef12e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
@@ -305,6 +305,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v
; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
; SDAG-NEXT: v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT: ; implicit-def: $sgpr15
; SDAG-NEXT: s_mov_b32 s14, s16
; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
; SDAG-NEXT: v_add_f32_e32 v0, 1.0, v0
@@ -330,6 +331,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v
; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
; GISEL-NEXT: v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT: ; implicit-def: $sgpr15
; GISEL-NEXT: s_mov_b32 s14, s16
; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GISEL-NEXT: v_add_f32_e32 v0, 1.0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index e2ca278d687be..ada817b402444 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -5,6 +5,10 @@
; A call should be skipped if all lanes are zero, since we don't know
; what side effects should be avoided inside the call.
define hidden void @func() #1 {
+; GCN-LABEL: func:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
ret void
}
@@ -85,19 +89,20 @@ define amdgpu_kernel void @if_call_kernel() #0 {
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; SDAG-NEXT: s_mov_b32 s32, 0
; SDAG-NEXT: s_mov_b32 flat_scratch_lo, s13
-; SDAG-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT: s_and_saveexec_b64 s[18:19], vcc
; SDAG-NEXT: s_cbranch_execz .LBB3_2
; SDAG-NEXT: ; %bb.1: ; %call
; SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT: s_mov_b32 s13, s15
; SDAG-NEXT: s_getpc_b64 s[18:19]
; SDAG-NEXT: s_add_u32 s18, s18, func at rel32@lo+4
; SDAG-NEXT: s_addc_u32 s19, s19, func at rel32@hi+12
; SDAG-NEXT: v_or_b32_e32 v31, v0, v2
; SDAG-NEXT: s_mov_b32 s12, s14
-; SDAG-NEXT: s_mov_b32 s13, s15
; SDAG-NEXT: s_mov_b32 s14, s16
+; SDAG-NEXT: ; implicit-def: $sgpr15
; SDAG-NEXT: s_swappc_b64 s[30:31], s[18:19]
; SDAG-NEXT: .LBB3_2: ; %end
; SDAG-NEXT: s_endpgm
@@ -111,19 +116,20 @@ define amdgpu_kernel void @if_call_kernel() #0 {
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT: s_and_saveexec_b64 s[18:19], vcc
; GISEL-NEXT: s_cbranch_execz .LBB3_2
; GISEL-NEXT: ; %bb.1: ; %call
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; GISEL-NEXT: s_mov_b32 s13, s15
; GISEL-NEXT: s_getpc_b64 s[18:19]
; GISEL-NEXT: s_add_u32 s18, s18, func at rel32@lo+4
; GISEL-NEXT: s_addc_u32 s19, s19, func at rel32@hi+12
; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
; GISEL-NEXT: s_mov_b32 s12, s14
-; GISEL-NEXT: s_mov_b32 s13, s15
; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: ; implicit-def: $sgpr15
; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GISEL-NEXT: .LBB3_2: ; %end
; GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 38c20c7cf62d6..45836ff81f774 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -184,21 +184,22 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_bitcmp1_b32 s12, 0
-; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GCN-NEXT: s_and_b64 vcc, exec, s[12:13]
+; GCN-NEXT: s_cselect_b64 s[18:19], -1, 0
+; GCN-NEXT: s_and_b64 vcc, exec, s[18:19]
; GCN-NEXT: s_cbranch_vccnz .LBB4_2
; GCN-NEXT: ; %bb.1: ; %if.else
; GCN-NEXT: s_add_u32 s8, s8, 8
; GCN-NEXT: s_addc_u32 s9, s9, 0
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: s_mov_b32 s13, s15
; GCN-NEXT: s_getpc_b64 s[18:19]
; GCN-NEXT: s_add_u32 s18, s18, func_v3i16 at rel32@lo+4
; GCN-NEXT: s_addc_u32 s19, s19, func_v3i16 at rel32@hi+12
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
; GCN-NEXT: s_mov_b32 s12, s14
-; GCN-NEXT: s_mov_b32 s13, s15
; GCN-NEXT: s_mov_b32 s14, s16
+; GCN-NEXT: ; implicit-def: $sgpr15
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GCN-NEXT: s_branch .LBB4_3
; GCN-NEXT: .LBB4_2:
@@ -235,21 +236,22 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_bitcmp1_b32 s12, 0
-; GCN-NEXT: s_cselect_b64 s[12:13], -1, 0
-; GCN-NEXT: s_and_b64 vcc, exec, s[12:13]
+; GCN-NEXT: s_cselect_b64 s[18:19], -1, 0
+; GCN-NEXT: s_and_b64 vcc, exec, s[18:19]
; GCN-NEXT: s_cbranch_vccnz .LBB5_2
; GCN-NEXT: ; %bb.1: ; %if.else
; GCN-NEXT: s_add_u32 s8, s8, 8
; GCN-NEXT: s_addc_u32 s9, s9, 0
; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT: s_mov_b32 s13, s15
; GCN-NEXT: s_getpc_b64 s[18:19]
; GCN-NEXT: s_add_u32 s18, s18, func_v3f16 at rel32@lo+4
; GCN-NEXT: s_addc_u32 s19, s19, func_v3f16 at rel32@hi+12
; GCN-NEXT: v_or3_b32 v31, v0, v1, v2
; GCN-NEXT: s_mov_b32 s12, s14
-; GCN-NEXT: s_mov_b32 s13, s15
; GCN-NEXT: s_mov_b32 s14, s16
+; GCN-NEXT: ; implicit-def: $sgpr15
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GCN-NEXT: s_branch .LBB5_3
; GCN-NEXT: .LBB5_2:
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 103ca48a7dc5f..d6e11e6af6d84 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -386,6 +386,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -462,6 +463,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -528,6 +530,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -594,6 +597,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -649,6 +653,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1
@@ -688,16 +693,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop
@@ -751,6 +758,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -803,6 +811,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -885,6 +894,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -962,6 +972,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -1023,6 +1034,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -1078,16 +1090,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -1599,6 +1613,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -1675,6 +1690,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -1741,6 +1757,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -1807,6 +1824,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -1862,6 +1880,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1
@@ -1901,16 +1920,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop
@@ -1964,6 +1985,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -2016,6 +2038,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2098,6 +2121,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2175,6 +2199,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -2236,6 +2261,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2291,16 +2317,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -2872,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -2948,6 +2977,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -3014,6 +3044,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3080,6 +3111,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3135,6 +3167,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1
@@ -3174,16 +3207,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop
@@ -3237,6 +3272,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -3289,6 +3325,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -3371,6 +3408,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -3448,6 +3486,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -3509,6 +3548,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -3564,16 +3604,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -3641,6 +3683,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -3717,6 +3760,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -3783,6 +3827,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3849,6 +3894,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3904,6 +3950,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v1, 1
@@ -3943,16 +3990,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB6_1: ; %ComputeLoop
@@ -4006,6 +4055,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -4058,6 +4108,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -4140,6 +4191,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4217,6 +4269,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -4278,6 +4331,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4333,16 +4387,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -4913,6 +4969,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -4989,6 +5046,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -5055,6 +5113,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -5121,6 +5180,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -5176,6 +5236,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1
@@ -5228,16 +5289,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB8_1: ; %ComputeLoop
@@ -5304,6 +5367,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -5356,6 +5420,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -5438,6 +5503,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5515,6 +5581,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5576,6 +5643,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5644,16 +5712,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -6165,6 +6235,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -6246,6 +6317,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -6315,6 +6387,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -6384,6 +6457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -6442,6 +6516,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -6497,16 +6572,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -6575,6 +6652,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -6630,6 +6708,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6729,6 +6808,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6818,6 +6898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -6891,6 +6972,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6973,16 +7055,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -7592,6 +7676,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -7673,6 +7758,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -7742,6 +7828,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -7811,6 +7898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -7869,6 +7957,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -7924,16 +8013,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -8002,6 +8093,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -8057,6 +8149,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8156,6 +8249,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8245,6 +8339,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8318,6 +8413,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8400,16 +8496,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -9019,6 +9117,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -9100,6 +9199,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -9169,6 +9269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -9238,6 +9339,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -9296,6 +9398,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -9351,16 +9454,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -9429,6 +9534,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -9484,6 +9590,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -9583,6 +9690,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -9672,6 +9780,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -9745,6 +9854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -9827,16 +9937,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -9928,6 +10040,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -10009,6 +10122,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -10078,6 +10192,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -10147,6 +10262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -10205,6 +10321,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -10260,16 +10377,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -10338,6 +10457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -10393,6 +10513,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -10492,6 +10613,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -10581,6 +10703,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -10654,6 +10777,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -10736,16 +10860,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -11355,6 +11481,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -11436,6 +11563,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -11505,6 +11633,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -11574,6 +11703,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -11632,6 +11762,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -11687,16 +11818,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -11765,6 +11898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -11820,6 +11954,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -11919,6 +12054,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -12008,6 +12144,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -12081,6 +12218,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -12163,16 +12301,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 2160976599dd7..957ff4766e709 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop
@@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop
@@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop
@@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
@@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
@@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
@@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
@@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
@@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
@@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 029fb9c118344..97659df4f6496 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop
@@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop
@@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v1, 0x7fc00000
@@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v1, 0x7fc00000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop
@@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
@@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
@@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
@@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
@@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v2, 0
@@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v2, 0
@@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_mov_b32_e32 v5, 0x7ff80000
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 3250d95bb0b7d..68e87b16c66fe 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -446,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -522,6 +523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -588,6 +590,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -654,6 +657,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -709,6 +713,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1
@@ -761,16 +766,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB1_1: ; %ComputeLoop
@@ -837,6 +844,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -889,6 +897,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -971,6 +980,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -1048,6 +1058,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -1109,6 +1120,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -1177,16 +1189,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -1771,6 +1785,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -1847,6 +1862,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -1913,6 +1929,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -1979,6 +1996,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -2034,6 +2052,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1
@@ -2086,16 +2105,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB3_1: ; %ComputeLoop
@@ -2162,6 +2183,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -2214,6 +2236,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2296,6 +2319,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2373,6 +2397,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -2434,6 +2459,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -2502,16 +2528,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -3096,6 +3124,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -3172,6 +3201,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -3238,6 +3268,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3304,6 +3335,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3359,6 +3391,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1
@@ -3411,16 +3444,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB5_1: ; %ComputeLoop
@@ -3487,6 +3522,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -3539,6 +3575,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -3621,6 +3658,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -3698,6 +3736,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -3759,6 +3798,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -3827,16 +3867,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -3917,6 +3959,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -3993,6 +4036,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -4059,6 +4103,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -4125,6 +4170,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -4180,6 +4226,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1
@@ -4232,16 +4279,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB6_1: ; %ComputeLoop
@@ -4308,6 +4357,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -4360,6 +4410,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -4442,6 +4493,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4519,6 +4571,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -4580,6 +4633,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -4648,16 +4702,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -5241,6 +5297,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -5317,6 +5374,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_mov_b64 s[0:1], exec
@@ -5383,6 +5441,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_bfrev_b32_e32 v2, 1
@@ -5449,6 +5508,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_bfrev_b32_e32 v2, 1
@@ -5504,6 +5564,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_bfrev_b32_e32 v2, 1
@@ -5556,16 +5617,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_bfrev_b32_e32 v2, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
; GFX1132-NEXT: .LBB8_1: ; %ComputeLoop
@@ -5632,6 +5695,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -5684,6 +5748,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -5766,6 +5831,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5843,6 +5909,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -5904,6 +5971,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -5972,16 +6040,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v1, 1
; GFX1132-DPP-NEXT: v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -6493,6 +6563,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -6574,6 +6645,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -6643,6 +6715,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -6712,6 +6785,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -6770,6 +6844,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -6825,16 +6900,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -6903,6 +6980,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -6958,6 +7036,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -7057,6 +7136,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -7146,6 +7226,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -7219,6 +7300,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -7301,16 +7383,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -7919,6 +8003,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -8000,6 +8085,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -8069,6 +8155,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -8138,6 +8225,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -8196,6 +8284,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -8251,16 +8340,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -8329,6 +8420,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -8384,6 +8476,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8483,6 +8576,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8572,6 +8666,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -8645,6 +8740,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -8727,16 +8823,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -9346,6 +9444,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -9427,6 +9526,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -9496,6 +9596,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -9565,6 +9666,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -9623,6 +9725,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -9678,16 +9781,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -9756,6 +9861,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -9811,6 +9917,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -9910,6 +10017,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -9999,6 +10107,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -10072,6 +10181,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -10154,16 +10264,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -10255,6 +10367,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -10336,6 +10449,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -10405,6 +10519,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -10474,6 +10589,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -10532,6 +10648,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -10587,16 +10704,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -10665,6 +10784,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -10720,6 +10840,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -10819,6 +10940,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -10908,6 +11030,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -10981,6 +11104,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -11063,16 +11187,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
@@ -11681,6 +11807,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -11762,6 +11889,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: v_mov_b32_e32 v4, 0
@@ -11831,6 +11959,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-NEXT: s_mov_b32 s32, 0
+; GFX1064-NEXT: ; implicit-def: $sgpr15
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-NEXT: v_mov_b32_e32 v4, 0
@@ -11900,6 +12029,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-NEXT: s_mov_b32 s32, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr15
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-NEXT: v_mov_b32_e32 v4, 0
@@ -11958,6 +12088,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-NEXT: s_mov_b32 s32, 0
+; GFX1164-NEXT: ; implicit-def: $sgpr15
; GFX1164-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-NEXT: v_mov_b32_e32 v4, 0
@@ -12013,16 +12144,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-NEXT: s_getpc_b64 s[4:5]
; GFX1132-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT: s_mov_b32 s16, s15
+; GFX1132-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b32 s12, s13
-; GFX1132-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-NEXT: s_mov_b32 s13, s14
-; GFX1132-NEXT: s_mov_b32 s14, s15
+; GFX1132-NEXT: s_mov_b32 s14, s16
; GFX1132-NEXT: s_mov_b32 s32, 0
+; GFX1132-NEXT: ; implicit-def: $sgpr15
; GFX1132-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-NEXT: v_mov_b32_e32 v4, 0
; GFX1132-NEXT: v_bfrev_b32_e32 v5, 1
; GFX1132-NEXT: s_mov_b32 s0, exec_lo
@@ -12091,6 +12224,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT: ; implicit-def: $sgpr15
; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49]
@@ -12146,6 +12280,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-DPP-NEXT: s_mov_b32 s32, 0
+; GFX9-DPP-NEXT: ; implicit-def: $sgpr15
; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -12245,6 +12380,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1064-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1064-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -12334,6 +12470,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX1032-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1032-DPP-NEXT: s_or_saveexec_b32 s0, -1
@@ -12407,6 +12544,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1164-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1
@@ -12489,16 +12627,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
; GFX1132-DPP-NEXT: s_getpc_b64 s[4:5]
; GFX1132-DPP-NEXT: s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
; GFX1132-DPP-NEXT: s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT: s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT: s_load_b64 s[18:19], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT: s_load_b64 s[16:17], s[4:5], 0x0
; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX1132-DPP-NEXT: s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT: s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT: s_mov_b32 s14, s16
; GFX1132-DPP-NEXT: s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT: ; implicit-def: $sgpr15
; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1
; GFX1132-DPP-NEXT: v_bfrev_b32_e32 v3, 1
; GFX1132-DPP-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index d895a75de6e81..880cfbf12b931 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -60,8 +60,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_load_b32 s24, s[18:19], 0x24
; GFX11-NEXT: s_mov_b32 s12, s13
; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX11-NEXT: s_mov_b32 s20, 0
; GFX11-NEXT: s_mov_b32 s0, -1
@@ -86,6 +86,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, f0 at gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_mov_b32 s13, s14
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_mov_b32 s26, s14
@@ -192,7 +193,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
; GFX11-NEXT: s_mov_b32 s13, s14
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT: s_mov_b32 s14, s15
+; GFX11-NEXT: s_mov_b32 s14, s18
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_or_b32 s20, s20, exec_lo
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 0e250baa86090..951bf1e6eeac8 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -109,6 +109,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
@@ -194,6 +195,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v57
@@ -220,6 +222,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v59, 1, v57
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v59
@@ -246,6 +249,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v59, 2, v57
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v59
@@ -272,6 +276,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v57
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v58
@@ -322,6 +327,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v57
@@ -356,6 +362,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_mov_b32 s4, exec_lo
@@ -381,6 +388,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41
; CHECK-NEXT: v_add_co_ci_u32_e64 v0, null, 0, v1, vcc_lo
@@ -441,6 +449,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 2, v73
; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0
@@ -501,6 +510,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_branch .LBB0_27
; CHECK-NEXT: .LBB0_33:
@@ -854,6 +864,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4
; CHECK-NEXT: v_mov_b32_e32 v1, 12
@@ -944,6 +955,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: ds_write_b32 v0, v56
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 9585c486aeb9e..22b9a9144c68d 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -902,6 +902,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(
; GFX908-NEXT: s_mov_b64 s[0:1], s[52:53]
; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2
; GFX908-NEXT: s_mov_b64 s[2:3], s[54:55]
+; GFX908-NEXT: ; implicit-def: $sgpr15
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: .LBB6_2: ; %bb3
diff --git a/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
new file mode 100644
index 0000000000000..788065fd1391d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
@@ -0,0 +1,49 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+
+# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
+# before removing the IMPLICIT_DEF. -verify-machineinstrs will
+# fail otherwise.
+
+# RUN: llc -mtriple=amdgcn -mcpu=gfx802 -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+
+name: implicit_def_multiple_use
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: implicit_def_multiple_use
+ ; CHECK: $vgpr1_lo16 = COPY undef $vgpr0_hi16
+ ; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1_lo16 = COPY $vgpr0_hi16
+ $vgpr1_hi16 = COPY $vgpr0_lo16
+ S_ENDPGM 0
+...
+
+# IMPLICIT_DEF processing Will not search across basic-blocks for uses. IMPLICIT_DEF must not be deleted.
+---
+
+name: implicit_def_cannot_find_all_uses
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: implicit_def_cannot_find_all_uses
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr1_lo16 = COPY undef $vgpr0_hi16
+ ; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16
+ ; CHECK-NEXT: $scc = IMPLICIT_DEF
+ ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.0, implicit undef $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $vgpr1_lo16 = COPY $vgpr0_hi16
+ $vgpr1_hi16 = COPY $vgpr0_lo16
+ $scc = IMPLICIT_DEF
+ S_CBRANCH_SCC1 %bb.0, implicit $scc
+ bb.1:
+...
diff --git a/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll b/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll
index b0c672d3c55de..d7a6cce1fea33 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll
@@ -30,6 +30,7 @@ define amdgpu_kernel void @main(i1 %arg, ptr %ptr, ptr addrspace(1) %ptr1, ptr a
; GFX950-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX950-NEXT: v_mov_b32_e32 v31, v0
; GFX950-NEXT: s_mov_b32 s32, 0
+; GFX950-NEXT: ; implicit-def: $sgpr15
; GFX950-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX950-NEXT: v_mov_b32_e32 v1, 0
; GFX950-NEXT: v_lshl_add_u64 v[10:11], v[0:1], 3, s[38:39]
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index a2b0f4d56ebea..7d140844278bb 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -362,6 +362,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 17, v0
@@ -489,6 +490,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: s_mov_b32 s32, 0
+; GFX900-NEXT: ; implicit-def: $sgpr15
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX900-NEXT: v_and_b32_e32 v1, 0xff, v0
@@ -599,6 +601,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
+; GFX10-NEXT: ; implicit-def: $sgpr15
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 17, v0
@@ -711,6 +714,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: s_mov_b32 s32, 0
+; GFX90A-NEXT: ; implicit-def: $sgpr15
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX90A-NEXT: v_and_b32_e32 v1, 0xff, v0
@@ -812,6 +816,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) {
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v1, 17, v0
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index d33e94809b326..db04d8c1c2678 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -25,6 +25,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF-NEXT: s_add_u32 s4, s4, svm_eval_nodes at rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, svm_eval_nodes at rel32@hi+12
; MUBUF-NEXT: s_mov_b32 s32, 0xc0000
+; MUBUF-NEXT: ; implicit-def: $sgpr15
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF-NEXT: v_mov_b32_e32 v0, s0
; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37]
@@ -61,6 +62,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, svm_eval_nodes at rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, svm_eval_nodes at rel32@hi+12
+; FLATSCR-NEXT: ; implicit-def: $sgpr15
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR-NEXT: v_mov_b32_e32 v0, s2
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -89,6 +91,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; MUBUF11-NEXT: s_add_u32 s0, s0, svm_eval_nodes at rel32@lo+4
; MUBUF11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes at rel32@hi+12
; MUBUF11-NEXT: s_movk_i32 s32, 0x6000
+; MUBUF11-NEXT: ; implicit-def: $sgpr15
; MUBUF11-NEXT: s_waitcnt lgkmcnt(0)
; MUBUF11-NEXT: v_mov_b32_e32 v0, s2
; MUBUF11-NEXT: s_swappc_b64 s[30:31], s[0:1]
@@ -116,6 +119,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
; FLATSCR11-NEXT: s_add_u32 s0, s0, svm_eval_nodes at rel32@lo+4
; FLATSCR11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes at rel32@hi+12
; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000
+; FLATSCR11-NEXT: ; implicit-def: $sgpr15
; FLATSCR11-NEXT: s_waitcnt lgkmcnt(0)
; FLATSCR11-NEXT: v_mov_b32_e32 v0, s2
; FLATSCR11-NEXT: s_swappc_b64 s[30:31], s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index 251908b1b0f94..03b653782e5ca 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: global_store_dword v[44:45], v42, off
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[52:53]
-; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[4:5]
; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20
; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17
@@ -68,7 +68,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9]
+; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[8:9]
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
; GLOBALNESS1-NEXT: ; implicit-def: $vgpr57 : SGPR spill to VGPR lane
; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
@@ -127,21 +127,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1
; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS1-NEXT: flat_load_dword v40, v[46:47]
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40
; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS1-NEXT: flat_load_dword v56, v[46:47]
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
; GLOBALNESS1-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[70:71]
@@ -241,13 +242,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT: s_add_u32 s70, s38, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s71, s39, 0
+; GLOBALNESS1-NEXT: s_add_u32 s70, s48, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s71, s49, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
; GLOBALNESS1-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
@@ -257,7 +258,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55]
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
@@ -266,6 +267,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[58:59], off
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55]
; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14
@@ -275,7 +277,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_branch .LBB1_14
; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23
; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0
+; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[48:49], 0x0
; GLOBALNESS1-NEXT: v_readlane_b32 s70, v57, 8
; GLOBALNESS1-NEXT: v_readlane_b32 s8, v57, 10
; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0
@@ -307,36 +309,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_31
; GLOBALNESS1-NEXT: ; %bb.30: ; %bb7.i.i
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS1-NEXT: .LBB1_31: ; %Flow
; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_33
; GLOBALNESS1-NEXT: ; %bb.32: ; %bb11.i.i
-; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT: s_add_u32 s8, s48, 40
+; GLOBALNESS1-NEXT: s_addc_u32 s9, s49, 0
; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS1-NEXT: s_mov_b32 s12, s84
; GLOBALNESS1-NEXT: s_mov_b32 s13, s83
; GLOBALNESS1-NEXT: s_mov_b32 s14, s82
; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS1-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock
;
@@ -350,7 +354,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: global_store_dword v[44:45], v42, off
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[52:53]
-; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5]
+; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[4:5]
; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18
; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20
; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17
@@ -379,7 +383,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0
; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9]
+; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[8:9]
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1
; GLOBALNESS0-NEXT: ; implicit-def: $vgpr57 : SGPR spill to VGPR lane
; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0
@@ -438,21 +442,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1
; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2
; GLOBALNESS0-NEXT: flat_load_dword v40, v[46:47]
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
+; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40
; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0
; GLOBALNESS0-NEXT: flat_load_dword v56, v[46:47]
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
; GLOBALNESS0-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[84:85]
@@ -553,13 +558,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15
; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i
; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT: s_add_u32 s84, s38, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s85, s39, 0
+; GLOBALNESS0-NEXT: s_add_u32 s84, s48, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s85, s49, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5]
; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
; GLOBALNESS0-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
@@ -569,7 +574,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0)
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55]
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
@@ -578,6 +583,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[58:59], off
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55]
; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97]
; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14
@@ -617,36 +623,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1
; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_31
; GLOBALNESS0-NEXT: ; %bb.30: ; %bb7.i.i
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0
; GLOBALNESS0-NEXT: .LBB1_31: ; %Flow
; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_33
; GLOBALNESS0-NEXT: ; %bb.32: ; %bb11.i.i
-; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT: s_add_u32 s8, s48, 40
+; GLOBALNESS0-NEXT: s_addc_u32 s9, s49, 0
; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17]
; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget at rel32@lo+4
; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[38:39]
; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37]
; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35]
; GLOBALNESS0-NEXT: s_mov_b32 s12, s82
; GLOBALNESS0-NEXT: s_mov_b32 s13, s71
; GLOBALNESS0-NEXT: s_mov_b32 s14, s70
; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT: ; implicit-def: $sgpr15
; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GLOBALNESS0-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
index 3558c4e1d0c85..485d7d3e75b62 100644
--- a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
+++ b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
@@ -66,6 +66,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
; CHECK-NEXT: flat_store_dwordx2 v[44:45], v[58:59]
; CHECK-NEXT: flat_store_dwordx2 v[56:57], v[62:63]
; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55]
; CHECK-NEXT: flat_load_dwordx2 v[0:1], v[46:47] glc
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index eba9faae5fdb1..8206fd0b4fe13 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -150,6 +150,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
; CHECK-NEXT: extsh r9, r3
; CHECK-NEXT: extsw r6, r28
; CHECK-NEXT: li r5, 0
+; CHECK-NEXT: # implicit-def: $f2
; CHECK-NEXT: li r7, 0
; CHECK-NEXT: std r30, 104(r1)
; CHECK-NEXT: std r29, 96(r1)
@@ -308,6 +309,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
; CHECK-BE-NEXT: extsh r9, r3
; CHECK-BE-NEXT: extsw r6, r28
; CHECK-BE-NEXT: li r5, 0
+; CHECK-BE-NEXT: # implicit-def: $f2
; CHECK-BE-NEXT: li r7, 0
; CHECK-BE-NEXT: std r30, 120(r1)
; CHECK-BE-NEXT: std r29, 112(r1)
diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
index 3803ac82458bd..81b640d17c620 100644
--- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
+++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
@@ -24,14 +24,22 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
; CHECK-NEXT: addi s0, sp, 32
; CHECK-NEXT: .cfi_def_cfa s0, 0
; CHECK-NEXT: .cfi_remember_state
-; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: .Ltmp0: # EH_LABEL
; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: # implicit-def: $x10
+; CHECK-NEXT: # implicit-def: $x11
+; CHECK-NEXT: # implicit-def: $x12
+; CHECK-NEXT: # implicit-def: $x13
+; CHECK-NEXT: # implicit-def: $x14
+; CHECK-NEXT: # implicit-def: $x15
+; CHECK-NEXT: # implicit-def: $x16
+; CHECK-NEXT: # implicit-def: $x17
; CHECK-NEXT: call _Z3fooiiiiiiiiiiPi
; CHECK-NEXT: addi sp, sp, 32
-; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT: .Ltmp1: # EH_LABEL
; CHECK-NEXT: # %bb.1: # %try.cont.unreachable
; CHECK-NEXT: .LBB0_2: # %lpad
-; CHECK-NEXT: .Ltmp2:
+; CHECK-NEXT: .Ltmp2: # EH_LABEL
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: li a2, 1
; CHECK-NEXT: bne a1, a2, .LBB0_4
diff --git a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
index 17027e2e4e5b4..8e2af5b44b3a9 100644
--- a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
@@ -13,6 +13,7 @@ define i32 @test(i32 %n) nounwind {
; CHECK-NEXT: .LBB0_1: @ %bb
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: bl f
+; CHECK-NEXT: @ implicit-def: $r1
; CHECK-NEXT: bl g
; CHECK-NEXT: subs r4, #1
; CHECK-NEXT: bne .LBB0_1
@@ -58,6 +59,7 @@ define i32 @test_dead_cycle(i32 %n) nounwind {
; CHECK-NEXT: @ %bb.2: @ %bb1
; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: bl f
+; CHECK-NEXT: @ implicit-def: $r1
; CHECK-NEXT: bl g
; CHECK-NEXT: .LBB1_3: @ %bb2
; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
diff --git a/llvm/test/CodeGen/X86/issue76416.ll b/llvm/test/CodeGen/X86/issue76416.ll
index 7193e54a6ad55..14786e5040da4 100644
--- a/llvm/test/CodeGen/X86/issue76416.ll
+++ b/llvm/test/CodeGen/X86/issue76416.ll
@@ -26,6 +26,7 @@ define dso_local void @vga_load_state() #0 {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_4: # %for.cond1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: # implicit-def: $edx
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movq vga_load_state_p(%rip), %rax
diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
index bd997d1647766..09d1dd9ccef95 100644
--- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
+++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -24,6 +24,7 @@ define void @PR24199(i32 %a0) {
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: .LBB0_3: # %if.end
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: # implicit-def: $rdi
; CHECK-NEXT: callq foo at PLT
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
new file mode 100644
index 0000000000000..d762e80792505
--- /dev/null
+++ b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
@@ -0,0 +1,15 @@
+# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
+# before removing the IMPLICIT_DEF. -verify-machineinstrs will
+# fail otherwise.
+
+# RUN: llc -x mir < %s -verify-machineinstrs | FileCheck %s
+---
+# CHECK-LABEL: implicit_def:
+name: implicit_def
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ $eax = IMPLICIT_DEF
+ MOV32mr $rip, 1, $noreg, 12, $noreg, $eax
+ RET 0, $eax
+...
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c24823538aa14..69a6cdb7081eb 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,10 +67,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: cmpq %rax, %rcx
; CHECK-NEXT: jae LBB0_8
; CHECK-NEXT: ## %bb.7: ## %for.body.lr.ph
+; CHECK-NEXT: movq %rdi, %r14
+; CHECK-NEXT: ## implicit-def: $rdi
; CHECK-NEXT: movq %rdx, %rbx
; CHECK-NEXT: movl $512, %edx ## imm = 0x200
; CHECK-NEXT: movl $32, %esi
-; CHECK-NEXT: movq %rdi, %r14
; CHECK-NEXT: callq _memset
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %rbx, %rdx
@@ -156,6 +157,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: jne LBB0_31
; CHECK-NEXT: ## %bb.30: ## %lor.rhs500
; CHECK-NEXT: ## in Loop: Header=BB0_28 Depth=2
+; CHECK-NEXT: ## implicit-def: $edi
; CHECK-NEXT: movl $256, %esi ## imm = 0x100
; CHECK-NEXT: callq ___maskrune
; CHECK-NEXT: movb $1, %sil
@@ -258,6 +260,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
; CHECK-NEXT: LBB0_47: ## %if.then1477
+; CHECK-NEXT: ## implicit-def: $edi
+; CHECK-NEXT: ## implicit-def: $rsi
; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: callq _write
; CHECK-NEXT: subq %rbx, %r14
diff --git a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
index ea1ca51908134..9663ab995d64f 100644
--- a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
+++ b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
@@ -14,7 +14,7 @@
; The new code choses %ebp as the split candidate as it has lower spill cost.
; Make sure the split behaves as expected
-; CHECK: RS_Split Cascade 1
+; CHECK: RS_Split Cascade 0
; CHECK-NOT: $eax static =
; CHECK: $eax no positive bundles
; CHECK-NEXT: $ecx no positive bundles
diff --git a/llvm/test/CodeGen/X86/statepoint-two-results.ll b/llvm/test/CodeGen/X86/statepoint-two-results.ll
index 4993c292dc553..82467841910b2 100644
--- a/llvm/test/CodeGen/X86/statepoint-two-results.ll
+++ b/llvm/test/CodeGen/X86/statepoint-two-results.ll
@@ -8,6 +8,7 @@ define void @quux() gc "statepoint-example" {
; CHECK: # %bb.0: # %bb1
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: # implicit-def: $rdi
; CHECK-NEXT: movl $4, %esi
; CHECK-NEXT: callq wombat at PLT
; CHECK-NEXT: .Ltmp0:
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
index ef542e5b1427a..f08b388c3dc56 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
@@ -183,6 +183,11 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
; CHECK-NEXT: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
; CHECK-NEXT: EH_LABEL <mcsymbol >
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+ ; CHECK-NEXT: dead $edi = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $rsi = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $edx = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $ecx = IMPLICIT_DEF
+ ; CHECK-NEXT: dead $r8d = IMPLICIT_DEF
; CHECK-NEXT: STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s64) on %stack.0)
; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: EH_LABEL <mcsymbol >
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 72e4fe410e269..f3e6d8b5489c3 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -109,6 +109,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB1_12
; CHECK-NEXT: # %bb.2: # %if.end50
+; CHECK-NEXT: # implicit-def: $rsi
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %r15, %rdx
; CHECK-NEXT: callq memcpy at PLT
@@ -170,6 +171,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
; CHECK-NEXT: .LBB1_11: # %if.then99.i
; CHECK-NEXT: movq .str.6 at GOTPCREL(%rip), %rdi
; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: # implicit-def: $esi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: callq cli_dbgmsg at PLT
; CHECK-NEXT: .LBB1_12: # %cleanup
>From 4b74beaf1aa671a777ec3bb39b72d151b8c035bc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Mon, 23 Mar 2026 16:33:07 -0500
Subject: [PATCH 02/11] Apply clang-format
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index a05d520678cc0..034a06e75de94 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -107,7 +107,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
bool ImplicitDefIsDead = false;
for (++UserMI; UserMI != UserE; ++UserMI) {
- bool DefinesReg=false;
+ bool DefinesReg = false;
for (MachineOperand &MO : UserMI->operands()) {
if (!MO.isReg())
continue;
@@ -115,10 +115,10 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
continue;
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
- if (!ImplicitDefIsDead && MO.isUse() )
+ if (!ImplicitDefIsDead && MO.isUse())
MO.setIsUndef();
if (MO.isDef())
- DefinesReg = true;
+ DefinesReg = true;
}
if (DefinesReg) {
ImplicitDefIsDead = true;
@@ -126,8 +126,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
}
}
- // If we have added an undef flag to all uses (i.e. we have found a redefining MI or
- // there are no successors), we can erase the IMPLICIT_DEF.
+ // If we have added an undef flag to all uses (i.e. we have found a redefining
+ // MI or there are no successors), we can erase the IMPLICIT_DEF.
if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
MI->eraseFromParent();
>From 8104f6e297ca6fdfcbf249ab3d98aca696624fc3 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 26 Mar 2026 09:58:00 -0500
Subject: [PATCH 03/11] Cleanup code, update comments, and ensure IMPLICIT_DEF
is fully redefined.
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 034a06e75de94..5c5c0c0dfaf25 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -102,22 +102,23 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
}
// This is a physreg implicit-def.
- // Look for the first instruction to use or define an alias.
- MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
- MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+ // Try to add undef flag to all uses. If all uses are updated remove
+ // implicit-def.
+ MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
+ MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
bool ImplicitDefIsDead = false;
- for (++UserMI; UserMI != UserE; ++UserMI) {
+ for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
bool DefinesReg = false;
- for (MachineOperand &MO : UserMI->operands()) {
+ for (MachineOperand &MO : SearchMI->operands()) {
if (!MO.isReg())
continue;
- Register UserReg = MO.getReg();
- if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
+ Register SearchReg = MO.getReg();
+ if (!SearchReg.isPhysical() || !TRI->regsOverlap(Reg, SearchReg))
continue;
- // UserMI uses or redefines Reg. Set <undef> flags on all uses.
- if (!ImplicitDefIsDead && MO.isUse())
+ // SearchMI uses or redefines Reg. Set <undef> flags on all uses.
+ if (MO.isUse())
MO.setIsUndef();
- if (MO.isDef())
+ if (MO.isDef() && TRI->isSubRegisterEq(SearchReg, Reg))
DefinesReg = true;
}
if (DefinesReg) {
@@ -129,7 +130,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// If we have added an undef flag to all uses (i.e. we have found a redefining
// MI or there are no successors), we can erase the IMPLICIT_DEF.
if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
- LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
+ if (ImplicitDefIsDead)
+ LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
MI->eraseFromParent();
return;
}
>From 117307a4e48202ef8cc7fd278905581181a0f1a6 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 26 Mar 2026 10:40:43 -0500
Subject: [PATCH 04/11] Add search limit
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 5c5c0c0dfaf25..847c249dda65b 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -107,8 +107,15 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
bool ImplicitDefIsDead = false;
+ bool SearchedWholeBlock = true;
+ constexpr unsigned SearchLimit = 60000;
+ unsigned Count = 0;
for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
bool DefinesReg = false;
+ if (++Count > SearchLimit) {
+ SearchedWholeBlock = false;
+ break;
+ }
for (MachineOperand &MO : SearchMI->operands()) {
if (!MO.isReg())
continue;
@@ -129,7 +136,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// If we have added an undef flag to all uses (i.e. we have found a redefining
// MI or there are no successors), we can erase the IMPLICIT_DEF.
- if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
+ if (ImplicitDefIsDead ||
+ (SearchedWholeBlock && MI->getParent()->succ_empty())) {
if (ImplicitDefIsDead)
LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
MI->eraseFromParent();
>From 170a932913d01050349ce759066cb58cc0d9a5d4 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 26 Mar 2026 13:08:13 -0500
Subject: [PATCH 05/11] Autogen results
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/test/CodeGen/X86/multi-use-implicit-def.mir | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
index d762e80792505..051157dc015bc 100644
--- a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
+++ b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
@@ -1,14 +1,19 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
# before removing the IMPLICIT_DEF. -verify-machineinstrs will
# fail otherwise.
-# RUN: llc -x mir < %s -verify-machineinstrs | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -x mir -stop-after processimpdefs < %s -verify-machineinstrs | FileCheck %s
---
-# CHECK-LABEL: implicit_def:
name: implicit_def
tracksRegLiveness: true
body: |
bb.0.entry:
+ ; CHECK-LABEL: name: implicit_def
+ ; CHECK: MOV32mr $rip, 1, $noreg, 12, $noreg, undef $eax
+ ; CHECK-NEXT: RET 0, undef $eax
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1.entry:
$eax = IMPLICIT_DEF
MOV32mr $rip, 1, $noreg, 12, $noreg, $eax
RET 0, $eax
>From b4fb9e50064ea649c77a66c60150adfaae3dc4db Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 27 Mar 2026 09:32:24 -0500
Subject: [PATCH 06/11] Simplify logic based on feedback
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 847c249dda65b..3c431b8c10bbe 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -111,7 +111,6 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
constexpr unsigned SearchLimit = 60000;
unsigned Count = 0;
for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
- bool DefinesReg = false;
if (++Count > SearchLimit) {
SearchedWholeBlock = false;
break;
@@ -126,10 +125,10 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
if (MO.isUse())
MO.setIsUndef();
if (MO.isDef() && TRI->isSubRegisterEq(SearchReg, Reg))
- DefinesReg = true;
+ ImplicitDefIsDead = true;
}
- if (DefinesReg) {
- ImplicitDefIsDead = true;
+ if (ImplicitDefIsDead) {
+ LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
break;
}
}
@@ -138,8 +137,6 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
// MI or there are no successors), we can erase the IMPLICIT_DEF.
if (ImplicitDefIsDead ||
(SearchedWholeBlock && MI->getParent()->succ_empty())) {
- if (ImplicitDefIsDead)
- LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
MI->eraseFromParent();
return;
}
>From 8ba7ecda66bc3fc5d08c68e8f99545ff7e362bd9 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 27 Mar 2026 10:24:43 -0500
Subject: [PATCH 07/11] Reduce search limit
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 2 +-
...ine-sink-temporal-divergence-swdev407790.ll | 2 ++
llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll | 2 ++
.../AMDGPU/promote-constOffset-to-imm.ll | 18 ++++++++++++++++++
4 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 3c431b8c10bbe..da1c86f3f974c 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -108,7 +108,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
bool ImplicitDefIsDead = false;
bool SearchedWholeBlock = true;
- constexpr unsigned SearchLimit = 60000;
+ constexpr unsigned SearchLimit = 35;
unsigned Count = 0;
for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
if (++Count > SearchLimit) {
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 951bf1e6eeac8..15f2dc086bdd1 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -77,6 +77,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mov_b32_e32 v41, v0
; CHECK-NEXT: v_mov_b32_e32 v31, v40
@@ -831,6 +832,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
; CHECK-NEXT: s_mov_b32 s12, s51
; CHECK-NEXT: s_mov_b32 s13, s50
; CHECK-NEXT: s_mov_b32 s14, s33
+; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mul_lo_u32 v44, v0, 14
; CHECK-NEXT: v_mov_b32_e32 v31, v40
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 22b9a9144c68d..263298d1cd1c8 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -402,6 +402,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr add
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX908-NEXT: v_mov_b32_e32 v32, 0
+; GFX908-NEXT: ; implicit-def: $agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111_agpr112_agpr113_agpr114_agpr115_agpr116_agpr117_agpr118_agpr119_agpr120_agpr121_agpr122_agpr123_agpr124_agpr125_agpr126_agpr127_agpr128_agpr129_agpr130_agpr131
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use a[100:131]
; GFX908-NEXT: ;;#ASMEND
@@ -638,6 +639,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg)
; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX908-NEXT: s_mov_b32 s32, 0
; GFX908-NEXT: v_mov_b32_e32 v40, 0
+; GFX908-NEXT: ; implicit-def: $sgpr15
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: global_load_dwordx4 v[28:31], v40, s[34:35] offset:112
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index 7d140844278bb..6084381da84f9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -27,6 +27,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -108,6 +109,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -178,6 +180,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
+; GFX10-NEXT: ; implicit-def: $sgpr15
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -241,6 +244,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) {
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1041,6 +1045,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1127,6 +1132,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1184,6 +1190,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
+; GFX10-NEXT: ; implicit-def: $sgpr15
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1241,6 +1248,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1358,6 +1366,7 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) {
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1583,6 +1592,7 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) {
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -1791,6 +1801,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0
@@ -1854,6 +1865,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0
@@ -1913,6 +1925,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49]
; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51]
; GFX10-NEXT: s_mov_b32 s32, 0
+; GFX10-NEXT: ; implicit-def: $sgpr15
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0
@@ -1963,6 +1976,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0
@@ -2063,6 +2077,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX8-NEXT: v_mov_b32_e32 v0, 0
; GFX8-NEXT: s_mov_b32 s32, 0
+; GFX8-NEXT: ; implicit-def: $sgpr15
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -2144,6 +2159,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_mov_b32 s32, 0
+; GFX9-NEXT: ; implicit-def: $sgpr15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -2213,6 +2229,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37]
; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39]
; GFX10-NEXT: s_mov_b32 s32, 0
+; GFX10-NEXT: ; implicit-def: $sgpr15
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 7, v0
@@ -2280,6 +2297,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[34:35], s[4:5], 0x24
; GFX11-NEXT: s_mov_b32 s32, 0
+; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 7, v0
>From d709f75bca25b5b0926b06f68f0c83f09f9288b3 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 27 Mar 2026 13:35:03 -0500
Subject: [PATCH 08/11] Cannot add undef to uses that are not fully undef
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 33 +++++++++------
.../CodeGen/AMDGPU/subreg-implicit-def.mir | 42 +++++++++++++++++++
2 files changed, 63 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index da1c86f3f974c..161fc558d0ebe 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -102,6 +102,10 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
}
// This is a physreg implicit-def.
+ // Trim any extra operands.
+ for (unsigned i = MI->getNumOperands() - 1; i; --i)
+ MI->removeOperand(i);
+
// Try to add undef flag to all uses. If all uses are updated remove
// implicit-def.
MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
@@ -122,13 +126,24 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
if (!SearchReg.isPhysical() || !TRI->regsOverlap(Reg, SearchReg))
continue;
// SearchMI uses or redefines Reg. Set <undef> flags on all uses.
- if (MO.isUse())
- MO.setIsUndef();
- if (MO.isDef() && TRI->isSubRegisterEq(SearchReg, Reg))
- ImplicitDefIsDead = true;
+ if (MO.isUse()) {
+ if (TRI->isSubRegisterEq(Reg, SearchReg))
+ MO.setIsUndef();
+ else
+ // Use is larger than Reg. It is not safe to add undef to this use.
+ return;
+ }
+ if (MO.isDef()) {
+ if (TRI->isSubRegisterEq(SearchReg, Reg))
+ ImplicitDefIsDead = true;
+ else
+ // Reg is larger than definition. It is not safe to add undef to any
+ // subsequent uses of Reg.
+ return;
+ }
}
if (ImplicitDefIsDead) {
- LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
+ LLVM_DEBUG(dbgs() << "Physreg redefine: " << *SearchMI);
break;
}
}
@@ -138,14 +153,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
if (ImplicitDefIsDead ||
(SearchedWholeBlock && MI->getParent()->succ_empty())) {
MI->eraseFromParent();
- return;
+ LLVM_DEBUG(dbgs() << "Deleting implicit-def: " << *MI);
}
-
- // Using instr wasn't found, it could be in another block.
- // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
- for (unsigned i = MI->getNumOperands() - 1; i; --i)
- MI->removeOperand(i);
- LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
}
bool ProcessImplicitDefsLegacy::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
new file mode 100644
index 0000000000000..9eb52fe4f82a0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
@@ -0,0 +1,42 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+
+# Ensure processing an IMPLICIT_DEF of a physreg handles subreg definitions
+# and super-reg uses correctly.
+
+
+# RUN: llc -mtriple=amdgcn -mcpu=gfx802 -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s
+
+# Must not add undef to use of implicit-def because def is partially redefined.
+---
+name: impdef_subreg_def
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: impdef_subreg_def
+ ; CHECK: $sgpr0_sgpr1 = IMPLICIT_DEF
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+ ; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: S_ENDPGM 0
+ $sgpr0_sgpr1 = IMPLICIT_DEF
+ $sgpr0 = S_MOV_B32 0
+ $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+ S_ENDPGM 0
+...
+
+# Must not add undef to use of implicit-def because use is larger than implicit definition.
+---
+name: impdef_superreg_use
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: impdef_superreg_use
+ ; CHECK: $sgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $sgpr1 = S_MOV_B32 0
+ ; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: S_ENDPGM 0
+ $sgpr0 = IMPLICIT_DEF
+ $sgpr1 = S_MOV_B32 0
+ $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+ S_ENDPGM 0
+...
+
>From a1fafa0aec759756ab9e5623769dfee4c0fdb416 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Tue, 31 Mar 2026 10:32:10 -0500
Subject: [PATCH 09/11] Skip debug instructions
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 161fc558d0ebe..15fbebd3d1353 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -115,6 +115,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
constexpr unsigned SearchLimit = 35;
unsigned Count = 0;
for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
+ if (SearchMI->isDebugInstr())
+ continue;
if (++Count > SearchLimit) {
SearchedWholeBlock = false;
break;
>From 96b5b06266aea7a23af745dd850660e929bad7d5 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Tue, 31 Mar 2026 11:41:13 -0500
Subject: [PATCH 10/11] Add braces
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 15fbebd3d1353..5fc765d90f6e4 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -129,19 +129,21 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
continue;
// SearchMI uses or redefines Reg. Set <undef> flags on all uses.
if (MO.isUse()) {
- if (TRI->isSubRegisterEq(Reg, SearchReg))
+ if (TRI->isSubRegisterEq(Reg, SearchReg)) {
MO.setIsUndef();
- else
+ } else {
// Use is larger than Reg. It is not safe to add undef to this use.
return;
+ }
}
if (MO.isDef()) {
- if (TRI->isSubRegisterEq(SearchReg, Reg))
+ if (TRI->isSubRegisterEq(SearchReg, Reg)) {
ImplicitDefIsDead = true;
- else
+ } else {
// Reg is larger than definition. It is not safe to add undef to any
// subsequent uses of Reg.
return;
+ }
}
}
if (ImplicitDefIsDead) {
>From 8993c705a0436f0f0518b4fb5e6cacb5fe57ef8c Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Wed, 1 Apr 2026 07:53:45 -0500
Subject: [PATCH 11/11] Update tests
Signed-off-by: John Lu <John.Lu at amd.com>
---
.../CodeGen/AMDGPU/insert-delay-alu-bug.ll | 29 ++++++++++---------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index 880cfbf12b931..7d8a0b70d5f55 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -57,7 +57,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_mov_b64 s[18:19], s[4:5]
; GFX11-NEXT: v_mov_b32_e32 v31, v0
-; GFX11-NEXT: s_load_b32 s24, s[18:19], 0x24
+; GFX11-NEXT: s_load_b32 s25, s[18:19], 0x24
+; GFX11-NEXT: s_mov_b32 s17, s15
; GFX11-NEXT: s_mov_b32 s12, s13
; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
@@ -65,10 +66,10 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1]
; GFX11-NEXT: s_mov_b32 s20, 0
; GFX11-NEXT: s_mov_b32 s0, -1
-; GFX11-NEXT: s_mov_b32 s17, exec_lo
+; GFX11-NEXT: s_mov_b32 s24, exec_lo
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_mul_lo_u32 v0, s24, v0
+; GFX11-NEXT: v_mul_lo_u32 v0, s25, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX11-NEXT: s_cbranch_execz .LBB2_13
@@ -76,7 +77,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_load_b128 s[20:23], s[18:19], 0x2c
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_bitcmp1_b32 s21, 0
-; GFX11-NEXT: s_cselect_b32 s25, -1, 0
+; GFX11-NEXT: s_cselect_b32 s26, -1, 0
; GFX11-NEXT: s_bitcmp0_b32 s21, 0
; GFX11-NEXT: s_mov_b32 s21, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB2_3
@@ -89,11 +90,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_mov_b32 s13, s14
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT: s_mov_b32 s26, s14
-; GFX11-NEXT: s_mov_b32 s14, s15
+; GFX11-NEXT: s_mov_b32 s15, s14
+; GFX11-NEXT: s_mov_b32 s14, s17
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: s_mov_b32 s14, s26
+; GFX11-NEXT: s_mov_b32 s14, s15
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_cbranch_execz .LBB2_4
; GFX11-NEXT: s_branch .LBB2_12
@@ -127,11 +128,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_mul_i32 s0, s0, s22
; GFX11-NEXT: s_mul_i32 s0, s0, s20
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_or_b32 s0, s24, s0
+; GFX11-NEXT: s_or_b32 s0, s25, s0
; GFX11-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
; GFX11-NEXT: s_mov_b32 s0, s1
; GFX11-NEXT: global_load_u16 v1, v0, s[22:23]
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s26
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
@@ -152,7 +153,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_and_b32 s1, s1, 1
; GFX11-NEXT: s_and_b32 s9, 0xffff, s0
; GFX11-NEXT: s_cselect_b32 s9, -1, 0
-; GFX11-NEXT: s_and_b32 s16, s8, exec_lo
+; GFX11-NEXT: s_and_b32 s15, s8, exec_lo
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s9
; GFX11-NEXT: v_readfirstlane_b32 s9, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
@@ -182,8 +183,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_and_b32 s20, s2, exec_lo
; GFX11-NEXT: s_or_not1_b32 s0, s21, exec_lo
; GFX11-NEXT: .LBB2_13: ; %Flow9
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s17
-; GFX11-NEXT: s_and_saveexec_b32 s17, s0
+; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s24
+; GFX11-NEXT: s_and_saveexec_b32 s21, s0
; GFX11-NEXT: s_cbranch_execz .LBB2_15
; GFX11-NEXT: ; %bb.14: ; %bb43
; GFX11-NEXT: s_add_u32 s8, s18, 0x58
@@ -193,13 +194,13 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
; GFX11-NEXT: s_mov_b32 s13, s14
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT: s_mov_b32 s14, s18
+; GFX11-NEXT: s_mov_b32 s14, s17
; GFX11-NEXT: ; implicit-def: $sgpr15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_or_b32 s20, s20, exec_lo
; GFX11-NEXT: .LBB2_15: ; %Flow14
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s17
+; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s21
; GFX11-NEXT: s_and_saveexec_b32 s0, s20
; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock
; GFX11-NEXT: ; divergent unreachable
More information about the llvm-commits
mailing list