[llvm] [CodeGen] Do not remove IMPLICIT_DEF unless all uses have undef flag added (PR #188133)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 05:54:16 PDT 2026


https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/188133

>From af65e1932d4b5e2e8cdde6b92787e949dc68d7ce Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Mon, 23 Mar 2026 16:24:34 -0500
Subject: [PATCH 01/11] Do not remove IMPLICIT_DEF unless all uses have undef
 flag added

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp      |  17 +-
 ...der-no-live-segment-at-def-implicit-def.ll |  26 +-
 .../branch-folding-implicit-def-subreg.ll     | 112 ++++----
 llvm/test/CodeGen/AMDGPU/call-constexpr.ll    |   2 +
 llvm/test/CodeGen/AMDGPU/call-skip.ll         |  14 +-
 .../AMDGPU/cross-block-use-is-not-abi-copy.ll |  14 +-
 .../AMDGPU/global_atomics_scan_fadd.ll        | 260 ++++++++++++++----
 .../AMDGPU/global_atomics_scan_fmax.ll        | 156 ++++++++---
 .../AMDGPU/global_atomics_scan_fmin.ll        | 156 ++++++++---
 .../AMDGPU/global_atomics_scan_fsub.ll        | 260 ++++++++++++++----
 .../CodeGen/AMDGPU/insert-delay-alu-bug.ll    |   6 +-
 ...ne-sink-temporal-divergence-swdev407790.ll |  12 +
 llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll    |   1 +
 .../CodeGen/AMDGPU/multi-use-implicit-def.mir |  49 ++++
 llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll   |   1 +
 .../AMDGPU/promote-constOffset-to-imm.ll      |   5 +
 ...tack-pointer-offset-relative-frameindex.ll |   4 +
 .../AMDGPU/tuple-allocation-failure.ll        |  70 ++---
 .../AMDGPU/undef-handling-crash-in-ra.ll      |   1 +
 llvm/test/CodeGen/PowerPC/p10-spill-crun.ll   |   2 +
 llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll |  14 +-
 .../CodeGen/Thumb2/2010-02-11-phi-cycle.ll    |   2 +
 llvm/test/CodeGen/X86/issue76416.ll           |   1 +
 .../X86/machine-trace-metrics-crash.ll        |   1 +
 .../CodeGen/X86/multi-use-implicit-def.mir    |  15 +
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |   6 +-
 .../X86/regalloc-advanced-split-cost.ll       |   2 +-
 .../CodeGen/X86/statepoint-two-results.ll     |   1 +
 .../CodeGen/X86/statepoint-vreg-invoke.ll     |   5 +
 .../X86/tail-dup-merge-loop-headers.ll        |   2 +
 30 files changed, 903 insertions(+), 314 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
 create mode 100644 llvm/test/CodeGen/X86/multi-use-implicit-def.mir

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 6b58b6574f5bd..a05d520678cc0 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -105,8 +105,9 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   // Look for the first instruction to use or define an alias.
   MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
   MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
-  bool Found = false;
+  bool ImplicitDefIsDead = false;
   for (++UserMI; UserMI != UserE; ++UserMI) {
+    bool DefinesReg=false;
     for (MachineOperand &MO : UserMI->operands()) {
       if (!MO.isReg())
         continue;
@@ -114,16 +115,20 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
       if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
         continue;
       // UserMI uses or redefines Reg. Set <undef> flags on all uses.
-      Found = true;
-      if (MO.isUse())
+      if (!ImplicitDefIsDead && MO.isUse() )
         MO.setIsUndef();
+      if (MO.isDef())
+	DefinesReg = true;
     }
-    if (Found)
+    if (DefinesReg) {
+      ImplicitDefIsDead = true;
       break;
+    }
   }
 
-  // If we found the using MI, we can erase the IMPLICIT_DEF.
-  if (Found) {
+  // If we have added an undef flag to all uses (i.e. we have found a redefining MI or
+  // there are no successors), we can erase the IMPLICIT_DEF.
+  if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
     LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
     MI->eraseFromParent();
     return;
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index 3bab9d0e80546..554d7eb2cfbdb 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -21,12 +21,12 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB0_4
 ; CHECK-NEXT:  ; %bb.2: ; %if.else251.i.i
 ; CHECK-NEXT:    s_cmp_lg_u32 s55, 0
-; CHECK-NEXT:    s_mov_b32 s17, 0
-; CHECK-NEXT:    s_cselect_b32 s12, -1, 0
-; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s12
+; CHECK-NEXT:    s_mov_b32 s13, s15
+; CHECK-NEXT:    s_cselect_b32 s17, -1, 0
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s17
 ; CHECK-NEXT:    s_cbranch_vccz .LBB0_5
 ; CHECK-NEXT:  ; %bb.3:
-; CHECK-NEXT:    s_mov_b32 s18, 0
+; CHECK-NEXT:    s_mov_b32 s15, 0
 ; CHECK-NEXT:    s_branch .LBB0_6
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    s_mov_b32 s14, s12
@@ -36,36 +36,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
 ; CHECK-NEXT:    s_mov_b64 s[48:49], s[12:13]
 ; CHECK-NEXT:    s_branch .LBB0_8
 ; CHECK-NEXT:  .LBB0_5: ; %if.then263.i.i
-; CHECK-NEXT:    v_cmp_lt_f32_e64 s12, s53, 0
-; CHECK-NEXT:    s_mov_b32 s18, 1.0
-; CHECK-NEXT:    s_mov_b32 s17, 0x7fc00000
+; CHECK-NEXT:    v_cmp_lt_f32_e64 s17, s53, 0
+; CHECK-NEXT:    s_mov_b32 s15, 1.0
+; CHECK-NEXT:    s_mov_b32 s12, 0x7fc00000
 ; CHECK-NEXT:  .LBB0_6: ; %Flow
 ; CHECK-NEXT:    s_mov_b32 s48, 1.0
-; CHECK-NEXT:    s_andn2_b32 vcc_lo, exec_lo, s12
+; CHECK-NEXT:    s_andn2_b32 vcc_lo, exec_lo, s17
 ; CHECK-NEXT:    s_mov_b32 s49, s48
 ; CHECK-NEXT:    s_mov_b32 s50, s48
 ; CHECK-NEXT:    s_mov_b32 s51, s48
 ; CHECK-NEXT:    s_cbranch_vccnz .LBB0_8
 ; CHECK-NEXT:  ; %bb.7: ; %if.end273.i.i
-; CHECK-NEXT:    s_add_u32 s12, s8, 40
-; CHECK-NEXT:    s_addc_u32 s13, s9, 0
+; CHECK-NEXT:    s_add_u32 s18, s8, 40
+; CHECK-NEXT:    s_addc_u32 s19, s9, 0
 ; CHECK-NEXT:    s_getpc_b64 s[20:21]
 ; CHECK-NEXT:    s_add_u32 s20, s20, _Z3dotDv3_fS_ at gotpcrel32@lo+4
 ; CHECK-NEXT:    s_addc_u32 s21, s21, _Z3dotDv3_fS_ at gotpcrel32@hi+12
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; CHECK-NEXT:    s_load_dwordx2 s[20:21], s[20:21], 0x0
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT:    v_add_f32_e64 v1, s17, s18
+; CHECK-NEXT:    v_add_f32_e64 v1, s12, s15
 ; CHECK-NEXT:    s_mov_b64 s[34:35], s[8:9]
-; CHECK-NEXT:    s_mov_b64 s[8:9], s[12:13]
+; CHECK-NEXT:    s_mov_b64 s[8:9], s[18:19]
 ; CHECK-NEXT:    s_mov_b32 s12, s14
 ; CHECK-NEXT:    v_or3_b32 v31, v0, v3, v2
 ; CHECK-NEXT:    v_mov_b32_e32 v0, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 0
-; CHECK-NEXT:    s_mov_b32 s13, s15
 ; CHECK-NEXT:    s_mov_b32 s14, s16
 ; CHECK-NEXT:    s_mov_b32 s48, 0
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[20:21]
 ; CHECK-NEXT:    s_mov_b64 s[8:9], s[34:35]
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index e0b83eeaa0faa..bc88acf521bd1 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -12,11 +12,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc
   ; GFX90A-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   renamable $sgpr17 = COPY $sgpr15
   ; GFX90A-NEXT:   renamable $vgpr31 = COPY $vgpr0, implicit $exec
   ; GFX90A-NEXT:   early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg3.kernarg.offset.align.down, align 8, addrspace 4)
   ; GFX90A-NEXT:   renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
   ; GFX90A-NEXT:   renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
-  ; GFX90A-NEXT:   renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4)
+  ; GFX90A-NEXT:   renamable $sgpr15 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg3.kernarg.offset.align.down + 16, align 8, addrspace 4)
   ; GFX90A-NEXT:   S_BITCMP1_B32 renamable $sgpr20, 0, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
   ; GFX90A-NEXT:   renamable $sgpr18_sgpr19 = S_MOV_B64 -1
@@ -40,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr40_sgpr41, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr4, $vgpr5
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr17 = IMPLICIT_DEF implicit-def $vgpr16
   ; GFX90A-NEXT:   renamable $vgpr3 = IMPLICIT_DEF implicit-def $vgpr2
@@ -51,7 +52,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.3.Flow17:
   ; GFX90A-NEXT:   successors: %bb.4(0x40000000), %bb.57(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr6 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
   ; GFX90A-NEXT:   renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def dead $scc
@@ -59,7 +60,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.4.bb15:
   ; GFX90A-NEXT:   successors: %bb.35(0x40000000), %bb.5(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
@@ -74,7 +75,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.5:
   ; GFX90A-NEXT:   successors: %bb.6(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr34_sgpr35 = S_MOV_B64 -1
   ; GFX90A-NEXT:   renamable $sgpr46_sgpr47 = S_MOV_B64 0
@@ -109,7 +110,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.6.Flow20:
   ; GFX90A-NEXT:   successors: %bb.7(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr26 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr28 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -122,7 +123,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.7.Flow19:
   ; GFX90A-NEXT:   successors: %bb.62(0x40000000), %bb.8(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr68_sgpr69 = S_MOV_B64 0
   ; GFX90A-NEXT:   $sgpr18_sgpr19 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -130,7 +131,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.8.Flow32:
   ; GFX90A-NEXT:   successors: %bb.9(0x40000000), %bb.10(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc
   ; GFX90A-NEXT:   $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -139,7 +140,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.9.bb89:
   ; GFX90A-NEXT:   successors: %bb.10(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET renamable $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -147,7 +148,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.10.Flow33:
   ; GFX90A-NEXT:   successors: %bb.11(0x40000000), %bb.12(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
   ; GFX90A-NEXT:   $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -156,7 +157,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.11.bb84:
   ; GFX90A-NEXT:   successors: %bb.12(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET renamable $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -164,7 +165,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.12.Flow34:
   ; GFX90A-NEXT:   successors: %bb.13(0x40000000), %bb.14(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
   ; GFX90A-NEXT:   $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -173,7 +174,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.13.bb79:
   ; GFX90A-NEXT:   successors: %bb.14(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5)
   ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5)
@@ -181,7 +182,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.14.Flow35:
   ; GFX90A-NEXT:   successors: %bb.15(0x40000000), %bb.16(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
   ; GFX90A-NEXT:   $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -190,14 +191,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.15.bb72:
   ; GFX90A-NEXT:   successors: %bb.16(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc
   ; GFX90A-NEXT:   renamable $sgpr12_sgpr13 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @f2, target-flags(amdgpu-gotprel32-hi) @f2, implicit-def dead $scc
   ; GFX90A-NEXT:   renamable $sgpr18_sgpr19 = S_LOAD_DWORDX2_IMM killed renamable $sgpr12_sgpr13, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+  ; GFX90A-NEXT:   dead $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr12 = COPY killed renamable $sgpr14
-  ; GFX90A-NEXT:   $sgpr13 = COPY killed renamable $sgpr15
+  ; GFX90A-NEXT:   $sgpr13 = COPY killed renamable $sgpr17
   ; GFX90A-NEXT:   $sgpr14 = COPY killed renamable $sgpr16
   ; GFX90A-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr18_sgpr19, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
   ; GFX90A-NEXT:   renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc
@@ -358,7 +360,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.35.bb20:
   ; GFX90A-NEXT:   successors: %bb.37(0x40000000), %bb.36(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23)
   ; GFX90A-NEXT:   renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
@@ -398,14 +400,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.36.Flow21:
   ; GFX90A-NEXT:   successors: %bb.6(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc
   ; GFX90A-NEXT:   S_BRANCH %bb.6
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.37.bb27:
   ; GFX90A-NEXT:   successors: %bb.39(0x40000000), %bb.38(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30)
   ; GFX90A-NEXT:   renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
@@ -437,7 +439,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.38.Flow22:
   ; GFX90A-NEXT:   successors: %bb.36(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -458,7 +460,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.39.bb34:
   ; GFX90A-NEXT:   successors: %bb.41(0x40000000), %bb.40(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr66_sgpr67
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37)
   ; GFX90A-NEXT:   renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
@@ -489,7 +491,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.40.Flow23:
   ; GFX90A-NEXT:   successors: %bb.38(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -509,7 +511,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.41.bb41:
   ; GFX90A-NEXT:   successors: %bb.46(0x40000000), %bb.42(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr54_sgpr55, $sgpr66_sgpr67, $sgpr68_sgpr69
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr1, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, $vcc, 0, implicit $exec
@@ -539,7 +541,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.42.Flow24:
   ; GFX90A-NEXT:   successors: %bb.40(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -558,7 +560,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.43.bb55:
   ; GFX90A-NEXT:   successors: %bb.48(0x40000000), %bb.44(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   S_BITCMP1_B32 renamable $sgpr20, 16, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc
@@ -570,7 +572,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.44:
   ; GFX90A-NEXT:   successors: %bb.45(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr57, $vgpr62, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr40, $vgpr61, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr56, $vgpr47, $vgpr2, $vgpr4, $vgpr5, $vgpr6, $vgpr46, $vgpr45, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr58, $vgpr60, $vgpr63, $vgpr59
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
   ; GFX90A-NEXT:   renamable $vgpr12_vgpr13 = IMPLICIT_DEF
@@ -590,7 +592,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.45.Flow26:
   ; GFX90A-NEXT:   successors: %bb.47(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc
   ; GFX90A-NEXT:   renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc
@@ -606,7 +608,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.46.bb48:
   ; GFX90A-NEXT:   successors: %bb.43(0x40000000), %bb.47(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr66_sgpr67, $sgpr58_sgpr59, $sgpr68_sgpr69, $sgpr64_sgpr65, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr60_sgpr61
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec
   ; GFX90A-NEXT:   renamable $sgpr18_sgpr19 = COPY $vcc
@@ -637,7 +639,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.47.Flow25:
   ; GFX90A-NEXT:   successors: %bb.42(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr44_sgpr45, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc
@@ -655,21 +657,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.48.bb63:
   ; GFX90A-NEXT:   successors: %bb.50(0x40000000), %bb.49(0x40000000)
-  ; GFX90A-NEXT:   liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
+  ; GFX90A-NEXT:   liveins: $vcc, $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59, $sgpr48_sgpr49
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr46_sgpr47 = S_MOV_B64 0
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.50, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.49:
   ; GFX90A-NEXT:   successors: %bb.44(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr48_sgpr49 = S_MOV_B64 -1
   ; GFX90A-NEXT:   S_BRANCH %bb.44
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.50.bb68:
   ; GFX90A-NEXT:   successors: %bb.54(0x40000000), %bb.51(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr6, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -678,7 +680,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.51:
   ; GFX90A-NEXT:   successors: %bb.45(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr58_sgpr59
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr50_sgpr51 = S_MOV_B64 -1
   ; GFX90A-NEXT:   renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37
@@ -698,16 +700,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.52.bb80:
   ; GFX90A-NEXT:   successors: %bb.59(0x40000000), %bb.53(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   dead renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc
+  ; GFX90A-NEXT:   dead renamable $sgpr15 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def $scc
   ; GFX90A-NEXT:   renamable $vgpr10 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr11, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_SCC0 %bb.59, implicit killed $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.53:
   ; GFX90A-NEXT:   successors: %bb.61(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr52_sgpr53 = S_MOV_B64 0
   ; GFX90A-NEXT:   renamable $sgpr54_sgpr55 = S_MOV_B64 -1
@@ -726,7 +728,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.54.bb73:
   ; GFX90A-NEXT:   successors: %bb.52(0x40000000), %bb.55(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr60_sgpr61
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr3 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
   ; GFX90A-NEXT:   renamable $vgpr8 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
@@ -751,14 +753,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.55.Flow29:
   ; GFX90A-NEXT:   successors: %bb.45(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr62_sgpr63, implicit-def $scc
   ; GFX90A-NEXT:   S_BRANCH %bb.45
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.56.bb90:
   ; GFX90A-NEXT:   successors: %bb.60(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr30 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -778,7 +780,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.57:
   ; GFX90A-NEXT:   successors: %bb.7(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr40_sgpr41, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr24 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr20 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
@@ -823,7 +825,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr26_vgpr27 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.419, addrspace 3)
   ; GFX90A-NEXT:   renamable $vgpr0 = COPY renamable $sgpr21, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2_vgpr3 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.4, addrspace 3)
-  ; GFX90A-NEXT:   renamable $vgpr0 = COPY killed renamable $sgpr17, implicit $exec
+  ; GFX90A-NEXT:   renamable $vgpr0 = COPY killed renamable $sgpr15, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.420, addrspace 3)
   ; GFX90A-NEXT:   renamable $vgpr0 = COPY renamable $sgpr22, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr32_vgpr33 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from %ir.5, addrspace 3)
@@ -832,7 +834,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.59.bb85:
   ; GFX90A-NEXT:   successors: %bb.56(0x40000000), %bb.60(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57:0x000000000000000F, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr12 = V_OR_B32_e32 1, $vgpr10, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr13 = COPY renamable $vgpr11, implicit $exec
@@ -854,14 +856,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.60.Flow31:
   ; GFX90A-NEXT:   successors: %bb.61(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc
   ; GFX90A-NEXT:   renamable $sgpr54_sgpr55 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.61.Flow30:
   ; GFX90A-NEXT:   successors: %bb.55(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19:0x0000000000000003, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x0000000000000003, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr58_sgpr59 = S_XOR_B64 $exec, -1, implicit-def dead $scc
   ; GFX90A-NEXT:   renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc
@@ -873,7 +875,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.62.bb140:
   ; GFX90A-NEXT:   successors: %bb.68(0x40000000), %bb.63(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr24_sgpr25 = S_MOV_B64 -1
   ; GFX90A-NEXT:   renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -881,14 +883,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.63.Flow13:
   ; GFX90A-NEXT:   successors: %bb.64(0x40000000), %bb.66(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def dead $scc
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.66, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.64.bb159:
   ; GFX90A-NEXT:   successors: %bb.67(0x40000000), %bb.65(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr6, implicit $exec
   ; GFX90A-NEXT:   $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
@@ -897,21 +899,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.65.Flow10:
   ; GFX90A-NEXT:   successors: %bb.66(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.66.Flow14:
   ; GFX90A-NEXT:   successors: %bb.8(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr68_sgpr69 = COPY $exec
   ; GFX90A-NEXT:   S_BRANCH %bb.8
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.67.bb161:
   ; GFX90A-NEXT:   successors: %bb.65(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr2 = V_OR_B32_e32 killed $vgpr27, killed $vgpr29, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr33, implicit $exec
@@ -930,7 +932,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.68.bb174:
   ; GFX90A-NEXT:   successors: %bb.72(0x40000000), %bb.69(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000F, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000F, $vgpr28_vgpr29:0x000000000000000F, $vgpr32_vgpr33:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $agpr0 = COPY killed renamable $vgpr14, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr34 = V_OR_B32_e32 1, $vgpr32, implicit $exec
@@ -947,14 +949,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.69.Flow:
   ; GFX90A-NEXT:   successors: %bb.70(0x40000000), %bb.71(0x40000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.71, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.70.bb186:
   ; GFX90A-NEXT:   successors: %bb.71(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
@@ -983,7 +985,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.71.Flow9:
   ; GFX90A-NEXT:   successors: %bb.63(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr24_sgpr25 = S_MOV_B64 0
   ; GFX90A-NEXT:   renamable $vgpr14 = COPY killed renamable $agpr0, implicit $exec
@@ -991,7 +993,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.72.bb196:
   ; GFX90A-NEXT:   successors: %bb.69(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GFX90A-NEXT:   liveins: $sgpr14, $sgpr16, $sgpr17, $vgpr7, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x0000000000000003, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x000000000000000C, $vgpr18_vgpr19:0x0000000000000003, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x0000000000000003, $vgpr24_vgpr25:0x0000000000000003, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x000000000000000C, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $vgpr2 = V_OR_B32_e32 $vgpr14, killed $vgpr24, implicit $exec
   ; GFX90A-NEXT:   renamable $vgpr22 = V_OR_B32_e32 killed $vgpr2, killed $vgpr22, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
index fe0b0188d2d37..697576edef12e 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
@@ -305,6 +305,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v
 ; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
 ; SDAG-NEXT:    v_mov_b32_e32 v0, 2.0
+; SDAG-NEXT:    ; implicit-def: $sgpr15
 ; SDAG-NEXT:    s_mov_b32 s14, s16
 ; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; SDAG-NEXT:    v_add_f32_e32 v0, 1.0, v0
@@ -330,6 +331,7 @@ define amdgpu_kernel void @test_invoke() #0 personality ptr @__gxx_personality_v
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 2.0
+; GISEL-NEXT:    ; implicit-def: $sgpr15
 ; GISEL-NEXT:    s_mov_b32 s14, s16
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GISEL-NEXT:    v_add_f32_e32 v0, 1.0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index e2ca278d687be..ada817b402444 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -5,6 +5,10 @@
 ; A call should be skipped if all lanes are zero, since we don't know
 ; what side effects should be avoided inside the call.
 define hidden void @func() #1 {
+; GCN-LABEL: func:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   ret void
 }
 
@@ -85,19 +89,20 @@ define amdgpu_kernel void @if_call_kernel() #0 {
 ; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; SDAG-NEXT:    s_mov_b32 s32, 0
 ; SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; SDAG-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; SDAG-NEXT:    s_and_saveexec_b64 s[18:19], vcc
 ; SDAG-NEXT:    s_cbranch_execz .LBB3_2
 ; SDAG-NEXT:  ; %bb.1: ; %call
 ; SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-NEXT:    s_mov_b32 s13, s15
 ; SDAG-NEXT:    s_getpc_b64 s[18:19]
 ; SDAG-NEXT:    s_add_u32 s18, s18, func at rel32@lo+4
 ; SDAG-NEXT:    s_addc_u32 s19, s19, func at rel32@hi+12
 ; SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
 ; SDAG-NEXT:    s_mov_b32 s12, s14
-; SDAG-NEXT:    s_mov_b32 s13, s15
 ; SDAG-NEXT:    s_mov_b32 s14, s16
+; SDAG-NEXT:    ; implicit-def: $sgpr15
 ; SDAG-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; SDAG-NEXT:  .LBB3_2: ; %end
 ; SDAG-NEXT:    s_endpgm
@@ -111,19 +116,20 @@ define amdgpu_kernel void @if_call_kernel() #0 {
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GISEL-NEXT:    s_mov_b32 s32, 0
 ; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_and_saveexec_b64 s[12:13], vcc
+; GISEL-NEXT:    s_and_saveexec_b64 s[18:19], vcc
 ; GISEL-NEXT:    s_cbranch_execz .LBB3_2
 ; GISEL-NEXT:  ; %bb.1: ; %call
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GISEL-NEXT:    s_mov_b32 s13, s15
 ; GISEL-NEXT:    s_getpc_b64 s[18:19]
 ; GISEL-NEXT:    s_add_u32 s18, s18, func at rel32@lo+4
 ; GISEL-NEXT:    s_addc_u32 s19, s19, func at rel32@hi+12
 ; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
 ; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_mov_b32 s13, s15
 ; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    ; implicit-def: $sgpr15
 ; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GISEL-NEXT:  .LBB3_2: ; %end
 ; GISEL-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 38c20c7cf62d6..45836ff81f774 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -184,21 +184,22 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_bitcmp1_b32 s12, 0
-; GCN-NEXT:    s_cselect_b64 s[12:13], -1, 0
-; GCN-NEXT:    s_and_b64 vcc, exec, s[12:13]
+; GCN-NEXT:    s_cselect_b64 s[18:19], -1, 0
+; GCN-NEXT:    s_and_b64 vcc, exec, s[18:19]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB4_2
 ; GCN-NEXT:  ; %bb.1: ; %if.else
 ; GCN-NEXT:    s_add_u32 s8, s8, 8
 ; GCN-NEXT:    s_addc_u32 s9, s9, 0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT:    s_mov_b32 s13, s15
 ; GCN-NEXT:    s_getpc_b64 s[18:19]
 ; GCN-NEXT:    s_add_u32 s18, s18, func_v3i16 at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s19, s19, func_v3i16 at rel32@hi+12
 ; GCN-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_mov_b32 s13, s15
 ; GCN-NEXT:    s_mov_b32 s14, s16
+; GCN-NEXT:    ; implicit-def: $sgpr15
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GCN-NEXT:    s_branch .LBB4_3
 ; GCN-NEXT:  .LBB4_2:
@@ -235,21 +236,22 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_bitcmp1_b32 s12, 0
-; GCN-NEXT:    s_cselect_b64 s[12:13], -1, 0
-; GCN-NEXT:    s_and_b64 vcc, exec, s[12:13]
+; GCN-NEXT:    s_cselect_b64 s[18:19], -1, 0
+; GCN-NEXT:    s_and_b64 vcc, exec, s[18:19]
 ; GCN-NEXT:    s_cbranch_vccnz .LBB5_2
 ; GCN-NEXT:  ; %bb.1: ; %if.else
 ; GCN-NEXT:    s_add_u32 s8, s8, 8
 ; GCN-NEXT:    s_addc_u32 s9, s9, 0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN-NEXT:    s_mov_b32 s13, s15
 ; GCN-NEXT:    s_getpc_b64 s[18:19]
 ; GCN-NEXT:    s_add_u32 s18, s18, func_v3f16 at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s19, s19, func_v3f16 at rel32@hi+12
 ; GCN-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_mov_b32 s13, s15
 ; GCN-NEXT:    s_mov_b32 s14, s16
+; GCN-NEXT:    ; implicit-def: $sgpr15
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GCN-NEXT:    s_branch .LBB5_3
 ; GCN-NEXT:  .LBB5_2:
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
index 103ca48a7dc5f..d6e11e6af6d84 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll
@@ -386,6 +386,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -462,6 +463,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -528,6 +530,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -594,6 +597,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -649,6 +653,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -688,16 +693,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB1_1: ; %ComputeLoop
@@ -751,6 +758,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -803,6 +811,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -885,6 +894,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -962,6 +972,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -1023,6 +1034,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -1078,16 +1090,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -1599,6 +1613,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -1675,6 +1690,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -1741,6 +1757,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -1807,6 +1824,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -1862,6 +1880,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -1901,16 +1920,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB3_1: ; %ComputeLoop
@@ -1964,6 +1985,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -2016,6 +2038,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2098,6 +2121,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2175,6 +2199,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -2236,6 +2261,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2291,16 +2317,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -2872,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -2948,6 +2977,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -3014,6 +3044,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3080,6 +3111,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3135,6 +3167,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -3174,16 +3207,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB5_1: ; %ComputeLoop
@@ -3237,6 +3272,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -3289,6 +3325,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -3371,6 +3408,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -3448,6 +3486,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -3509,6 +3548,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -3564,16 +3604,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -3641,6 +3683,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -3717,6 +3760,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -3783,6 +3827,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3849,6 +3894,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3904,6 +3950,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v1, 1
@@ -3943,16 +3990,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB6_1: ; %ComputeLoop
@@ -4006,6 +4055,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -4058,6 +4108,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -4140,6 +4191,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4217,6 +4269,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -4278,6 +4331,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4333,16 +4387,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -4913,6 +4969,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -4989,6 +5046,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -5055,6 +5113,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -5121,6 +5180,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -5176,6 +5236,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -5228,16 +5289,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB8_1: ; %ComputeLoop
@@ -5304,6 +5367,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -5356,6 +5420,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -5438,6 +5503,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5515,6 +5581,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -5576,6 +5643,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5644,16 +5712,18 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -6165,6 +6235,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -6246,6 +6317,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -6315,6 +6387,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -6384,6 +6457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -6442,6 +6516,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -6497,16 +6572,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -6575,6 +6652,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -6630,6 +6708,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6729,6 +6808,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6818,6 +6898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -6891,6 +6972,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6973,16 +7055,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -7592,6 +7676,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -7673,6 +7758,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -7742,6 +7828,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -7811,6 +7898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -7869,6 +7957,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -7924,16 +8013,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -8002,6 +8093,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -8057,6 +8149,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -8156,6 +8249,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -8245,6 +8339,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -8318,6 +8413,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -8400,16 +8496,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -9019,6 +9117,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -9100,6 +9199,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -9169,6 +9269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -9238,6 +9339,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -9296,6 +9398,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -9351,16 +9454,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -9429,6 +9534,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -9484,6 +9590,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -9583,6 +9690,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -9672,6 +9780,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -9745,6 +9854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -9827,16 +9937,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -9928,6 +10040,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -10009,6 +10122,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -10078,6 +10192,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -10147,6 +10262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -10205,6 +10321,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -10260,16 +10377,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -10338,6 +10457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -10393,6 +10513,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -10492,6 +10613,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -10581,6 +10703,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -10654,6 +10777,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -10736,16 +10860,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -11355,6 +11481,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -11436,6 +11563,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -11505,6 +11633,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -11574,6 +11703,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -11632,6 +11762,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -11687,16 +11818,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -11765,6 +11898,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -11820,6 +11954,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -11919,6 +12054,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -12008,6 +12144,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -12081,6 +12218,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -12163,16 +12301,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
index 2160976599dd7..957ff4766e709 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll
@@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB1_1: ; %ComputeLoop
@@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB3_1: ; %ComputeLoop
@@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB5_1: ; %ComputeLoop
@@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
@@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
@@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
@@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
@@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
@@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
@@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
index 029fb9c118344..97659df4f6496 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll
@@ -294,6 +294,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -374,6 +375,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -444,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -500,6 +503,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -545,6 +549,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -587,16 +592,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB1_1: ; %ComputeLoop
@@ -652,6 +659,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -706,6 +714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -797,6 +806,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -870,6 +880,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -925,6 +936,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -990,16 +1002,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -1327,6 +1341,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -1407,6 +1422,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -1477,6 +1493,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -1533,6 +1550,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -1578,6 +1596,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -1620,16 +1639,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB3_1: ; %ComputeLoop
@@ -1685,6 +1706,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -1739,6 +1761,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -1830,6 +1853,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -1903,6 +1927,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -1958,6 +1983,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2023,16 +2049,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -2360,6 +2388,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -2440,6 +2469,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -2510,6 +2540,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -2566,6 +2597,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -2611,6 +2643,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -2653,16 +2686,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB5_1: ; %ComputeLoop
@@ -2718,6 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -2772,6 +2808,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2863,6 +2900,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2936,6 +2974,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -2991,6 +3030,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -3056,16 +3096,18 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x7fc00000, v0, s0
@@ -3474,6 +3516,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -3559,6 +3602,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -3632,6 +3676,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
@@ -3690,6 +3735,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
@@ -3737,6 +3783,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -3798,16 +3845,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -3882,6 +3931,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -3939,6 +3989,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4047,6 +4098,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4131,6 +4183,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -4197,6 +4250,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4293,16 +4347,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -4738,6 +4794,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -4823,6 +4880,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -4896,6 +4954,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
@@ -4954,6 +5013,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
@@ -5001,6 +5061,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -5062,16 +5123,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -5146,6 +5209,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -5203,6 +5267,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5311,6 +5376,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5395,6 +5461,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -5461,6 +5528,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5557,16 +5625,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
@@ -6002,6 +6072,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -6087,6 +6158,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -6160,6 +6232,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v2, 0
@@ -6218,6 +6291,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v2, 0
@@ -6265,6 +6339,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -6326,16 +6401,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_mov_b32_e32 v5, 0x7ff80000
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -6410,6 +6487,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -6467,6 +6545,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6575,6 +6654,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6659,6 +6739,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -6725,6 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -6821,16 +6903,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_dual_mov_b32 v3, 0x7ff80000 :: v_dual_mov_b32 v2, 0
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v5, 0x7ff80000, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
index 3250d95bb0b7d..68e87b16c66fe 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll
@@ -446,6 +446,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -522,6 +523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -588,6 +590,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -654,6 +657,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -709,6 +713,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -761,16 +766,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB1_1: ; %ComputeLoop
@@ -837,6 +844,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -889,6 +897,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -971,6 +980,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -1048,6 +1058,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -1109,6 +1120,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -1177,16 +1189,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -1771,6 +1785,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -1847,6 +1862,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -1913,6 +1929,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -1979,6 +1996,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -2034,6 +2052,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -2086,16 +2105,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB3_1: ; %ComputeLoop
@@ -2162,6 +2183,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -2214,6 +2236,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -2296,6 +2319,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2373,6 +2397,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -2434,6 +2459,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -2502,16 +2528,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -3096,6 +3124,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -3172,6 +3201,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -3238,6 +3268,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3304,6 +3335,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3359,6 +3391,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -3411,16 +3444,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB5_1: ; %ComputeLoop
@@ -3487,6 +3522,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -3539,6 +3575,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -3621,6 +3658,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -3698,6 +3736,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -3759,6 +3798,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -3827,16 +3867,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -3917,6 +3959,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -3993,6 +4036,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -4059,6 +4103,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -4125,6 +4170,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -4180,6 +4226,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -4232,16 +4279,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB6_1: ; %ComputeLoop
@@ -4308,6 +4357,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -4360,6 +4410,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -4442,6 +4493,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4519,6 +4571,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -4580,6 +4633,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -4648,16 +4702,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -5241,6 +5297,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -5317,6 +5374,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    s_mov_b64 s[0:1], exec
@@ -5383,6 +5441,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -5449,6 +5508,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -5504,6 +5564,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_bfrev_b32_e32 v2, 1
@@ -5556,16 +5617,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX1132-NEXT:  .LBB8_1: ; %ComputeLoop
@@ -5632,6 +5695,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -5684,6 +5748,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    v_mbcnt_lo_u32_b32 v1, exec_lo, 0
@@ -5766,6 +5831,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5843,6 +5909,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -5904,6 +5971,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -5972,16 +6040,18 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX1132-DPP-NEXT:    v_cndmask_b32_e64 v2, 0x80000000, v0, s0
@@ -6493,6 +6563,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -6574,6 +6645,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -6643,6 +6715,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -6712,6 +6785,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -6770,6 +6844,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -6825,16 +6900,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -6903,6 +6980,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -6958,6 +7036,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -7057,6 +7136,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -7146,6 +7226,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -7219,6 +7300,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -7301,16 +7383,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -7919,6 +8003,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -8000,6 +8085,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -8069,6 +8155,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -8138,6 +8225,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -8196,6 +8284,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -8251,16 +8340,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -8329,6 +8420,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -8384,6 +8476,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -8483,6 +8576,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -8572,6 +8666,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -8645,6 +8740,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -8727,16 +8823,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -9346,6 +9444,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -9427,6 +9526,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -9496,6 +9596,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -9565,6 +9666,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -9623,6 +9725,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -9678,16 +9781,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -9756,6 +9861,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -9811,6 +9917,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -9910,6 +10017,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -9999,6 +10107,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -10072,6 +10181,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -10154,16 +10264,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.double.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.double.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -10255,6 +10367,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -10336,6 +10449,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -10405,6 +10519,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -10474,6 +10589,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -10532,6 +10648,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -10587,16 +10704,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -10665,6 +10784,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -10720,6 +10840,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -10819,6 +10940,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -10908,6 +11030,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -10981,6 +11104,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -11063,16 +11187,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
@@ -11681,6 +11807,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX7LESS-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -11762,6 +11889,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX9-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
@@ -11831,6 +11959,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1064-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-NEXT:    s_mov_b32 s32, 0
+; GFX1064-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-NEXT:    v_mov_b32_e32 v4, 0
@@ -11900,6 +12029,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1032-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-NEXT:    s_mov_b32 s32, 0
+; GFX1032-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-NEXT:    v_mov_b32_e32 v4, 0
@@ -11958,6 +12088,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1164-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s32, 0
+; GFX1164-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-NEXT:    v_mov_b32_e32 v4, 0
@@ -12013,16 +12144,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1132-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-NEXT:    s_mov_b32 s16, s15
+; GFX1132-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b32 s12, s13
-; GFX1132-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s13, s14
-; GFX1132-NEXT:    s_mov_b32 s14, s15
+; GFX1132-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-NEXT:    s_mov_b32 s32, 0
+; GFX1132-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX1132-NEXT:    v_bfrev_b32_e32 v5, 1
 ; GFX1132-NEXT:    s_mov_b32 s0, exec_lo
@@ -12091,6 +12224,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX7LESS-DPP-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7LESS-DPP-NEXT:    v_or_b32_e32 v31, v0, v2
+; GFX7LESS-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7LESS-DPP-NEXT:    s_mov_b64 s[0:1], s[48:49]
@@ -12146,6 +12280,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX9-DPP-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX9-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX9-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX9-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -12245,6 +12380,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1064-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1064-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1064-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1064-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1064-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -12334,6 +12470,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX1032-DPP-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX1032-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1032-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1032-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1032-DPP-NEXT:    s_or_saveexec_b32 s0, -1
@@ -12407,6 +12544,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1164-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1164-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1164-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1164-DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX1164-DPP-NEXT:    s_or_saveexec_b64 s[0:1], -1
@@ -12489,16 +12627,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau
 ; GFX1132-DPP-NEXT:    s_getpc_b64 s[4:5]
 ; GFX1132-DPP-NEXT:    s_add_u32 s4, s4, div.float.value at gotpcrel32@lo+4
 ; GFX1132-DPP-NEXT:    s_addc_u32 s5, s5, div.float.value at gotpcrel32@hi+12
+; GFX1132-DPP-NEXT:    s_mov_b32 s16, s15
+; GFX1132-DPP-NEXT:    s_load_b64 s[18:19], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b32 s12, s13
-; GFX1132-DPP-NEXT:    s_load_b64 s[16:17], s[4:5], 0x0
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX1132-DPP-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX1132-DPP-NEXT:    s_mov_b32 s13, s14
-; GFX1132-DPP-NEXT:    s_mov_b32 s14, s15
+; GFX1132-DPP-NEXT:    s_mov_b32 s14, s16
 ; GFX1132-DPP-NEXT:    s_mov_b32 s32, 0
+; GFX1132-DPP-NEXT:    ; implicit-def: $sgpr15
 ; GFX1132-DPP-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GFX1132-DPP-NEXT:    s_swappc_b64 s[30:31], s[18:19]
 ; GFX1132-DPP-NEXT:    s_or_saveexec_b32 s0, -1
 ; GFX1132-DPP-NEXT:    v_bfrev_b32_e32 v3, 1
 ; GFX1132-DPP-NEXT:    v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index d895a75de6e81..880cfbf12b931 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -60,8 +60,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_load_b32 s24, s[18:19], 0x24
 ; GFX11-NEXT:    s_mov_b32 s12, s13
 ; GFX11-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; GFX11-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
+; GFX11-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX11-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX11-NEXT:    s_mov_b32 s20, 0
 ; GFX11-NEXT:    s_mov_b32 s0, -1
@@ -86,6 +86,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_getpc_b64 s[0:1]
 ; GFX11-NEXT:    s_add_u32 s0, s0, f0 at gotpcrel32@lo+4
 ; GFX11-NEXT:    s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_mov_b32 s13, s14
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_mov_b32 s26, s14
@@ -192,7 +193,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
 ; GFX11-NEXT:    s_mov_b32 s13, s14
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT:    s_mov_b32 s14, s15
+; GFX11-NEXT:    s_mov_b32 s14, s18
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    s_or_b32 s20, s20, exec_lo
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 0e250baa86090..951bf1e6eeac8 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -109,6 +109,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_bfe_u32 v0, v0, v1, 4
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 12
@@ -194,6 +195,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
 ; CHECK-NEXT:    v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CHECK-NEXT:    ds_write_b32 v0, v57
@@ -220,6 +222,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s14, s33
 ; CHECK-NEXT:    v_add_nc_u32_e32 v59, 1, v57
 ; CHECK-NEXT:    v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CHECK-NEXT:    ds_write_b32 v0, v59
@@ -246,6 +249,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s14, s33
 ; CHECK-NEXT:    v_add_nc_u32_e32 v59, 2, v57
 ; CHECK-NEXT:    v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CHECK-NEXT:    ds_write_b32 v0, v59
@@ -272,6 +276,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s14, s33
 ; CHECK-NEXT:    v_add_nc_u32_e32 v58, 3, v57
 ; CHECK-NEXT:    v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CHECK-NEXT:    ds_write_b32 v0, v58
@@ -322,6 +327,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
 ; CHECK-NEXT:    v_add_nc_u32_e32 v47, 1, v47
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CHECK-NEXT:    ds_write_b32 v0, v57
@@ -356,6 +362,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_mov_b32_e32 v0, 0
 ; CHECK-NEXT:    s_mov_b32 s4, exec_lo
@@ -381,6 +388,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_add_co_u32 v41, vcc_lo, v0, v41
 ; CHECK-NEXT:    v_add_co_ci_u32_e64 v0, null, 0, v1, vcc_lo
@@ -441,6 +449,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    v_or3_b32 v73, v2, v0, v1
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    v_lshrrev_b32_e32 v0, 1, v73
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 2, v73
 ; CHECK-NEXT:    v_and_b32_e32 v0, 0x7fffc, v0
@@ -501,6 +510,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    s_branch .LBB0_27
 ; CHECK-NEXT:  .LBB0_33:
@@ -854,6 +864,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    v_bfe_u32 v0, v0, v1, 4
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 12
@@ -944,6 +955,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
 ; CHECK-NEXT:    v_add_nc_u32_e32 v43, 1, v43
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CHECK-NEXT:    ds_write_b32 v0, v56
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 9585c486aeb9e..22b9a9144c68d 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -902,6 +902,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(
 ; GFX908-NEXT:    s_mov_b64 s[0:1], s[52:53]
 ; GFX908-NEXT:    v_or3_b32 v31, v0, v1, v2
 ; GFX908-NEXT:    s_mov_b64 s[2:3], s[54:55]
+; GFX908-NEXT:    ; implicit-def: $sgpr15
 ; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX908-NEXT:  .LBB6_2: ; %bb3
diff --git a/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
new file mode 100644
index 0000000000000..788065fd1391d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/multi-use-implicit-def.mir
@@ -0,0 +1,49 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+
+# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
+# before removing the IMPLICIT_DEF.  -verify-machineinstrs will
+# fail otherwise.
+
+# RUN: llc -mtriple=amdgcn -mcpu=gfx802  -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+
+name: implicit_def_multiple_use
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: implicit_def_multiple_use
+    ; CHECK: $vgpr1_lo16 = COPY undef $vgpr0_hi16
+    ; CHECK-NEXT: $vgpr1_hi16 = COPY undef $vgpr0_lo16
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $vgpr0_hi16
+    $vgpr1_hi16 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...
+
+# IMPLICIT_DEF processing Will not search across basic-blocks for uses.  IMPLICIT_DEF must not be deleted.
+---
+
+name: implicit_def_cannot_find_all_uses
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: implicit_def_cannot_find_all_uses
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.0(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = IMPLICIT_DEF
+  ; CHECK-NEXT:   $vgpr1_lo16 = COPY undef $vgpr0_hi16
+  ; CHECK-NEXT:   $vgpr1_hi16 = COPY undef $vgpr0_lo16
+  ; CHECK-NEXT:   $scc = IMPLICIT_DEF
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.0, implicit undef $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $vgpr0_hi16
+    $vgpr1_hi16 = COPY $vgpr0_lo16
+    $scc = IMPLICIT_DEF
+    S_CBRANCH_SCC1 %bb.0, implicit $scc
+  bb.1:
+...
diff --git a/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll b/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll
index b0c672d3c55de..d7a6cce1fea33 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll
+++ b/llvm/test/CodeGen/AMDGPU/phi-av-pressure.ll
@@ -30,6 +30,7 @@ define amdgpu_kernel void @main(i1 %arg, ptr %ptr, ptr addrspace(1) %ptr1, ptr a
 ; GFX950-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX950-NEXT:    v_mov_b32_e32 v31, v0
 ; GFX950-NEXT:    s_mov_b32 s32, 0
+; GFX950-NEXT:    ; implicit-def: $sgpr15
 ; GFX950-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX950-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX950-NEXT:    v_lshl_add_u64 v[10:11], v[0:1], 3, s[38:39]
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index a2b0f4d56ebea..7d140844278bb 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -362,6 +362,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 17, v0
@@ -489,6 +490,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX900-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX900-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX900-NEXT:    s_mov_b32 s32, 0
+; GFX900-NEXT:    ; implicit-def: $sgpr15
 ; GFX900-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX900-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX900-NEXT:    v_and_b32_e32 v1, 0xff, v0
@@ -599,6 +601,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX10-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX10-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX10-NEXT:    s_mov_b32 s32, 0
+; GFX10-NEXT:    ; implicit-def: $sgpr15
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 17, v0
@@ -711,6 +714,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX90A-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    ; implicit-def: $sgpr15
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX90A-NEXT:    v_and_b32_e32 v1, 0xff, v0
@@ -812,6 +816,7 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1)  %buffer) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_load_b64 s[34:35], s[4:5], 0x24
 ; GFX11-NEXT:    s_mov_b32 s32, 0
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v1, 17, v0
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index d33e94809b326..db04d8c1c2678 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -25,6 +25,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; MUBUF-NEXT:    s_add_u32 s4, s4, svm_eval_nodes at rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, svm_eval_nodes at rel32@hi+12
 ; MUBUF-NEXT:    s_mov_b32 s32, 0xc0000
+; MUBUF-NEXT:    ; implicit-def: $sgpr15
 ; MUBUF-NEXT:    s_waitcnt lgkmcnt(0)
 ; MUBUF-NEXT:    v_mov_b32_e32 v0, s0
 ; MUBUF-NEXT:    s_mov_b64 s[0:1], s[36:37]
@@ -61,6 +62,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; FLATSCR-NEXT:    s_getpc_b64 s[0:1]
 ; FLATSCR-NEXT:    s_add_u32 s0, s0, svm_eval_nodes at rel32@lo+4
 ; FLATSCR-NEXT:    s_addc_u32 s1, s1, svm_eval_nodes at rel32@hi+12
+; FLATSCR-NEXT:    ; implicit-def: $sgpr15
 ; FLATSCR-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, s2
 ; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
@@ -89,6 +91,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; MUBUF11-NEXT:    s_add_u32 s0, s0, svm_eval_nodes at rel32@lo+4
 ; MUBUF11-NEXT:    s_addc_u32 s1, s1, svm_eval_nodes at rel32@hi+12
 ; MUBUF11-NEXT:    s_movk_i32 s32, 0x6000
+; MUBUF11-NEXT:    ; implicit-def: $sgpr15
 ; MUBUF11-NEXT:    s_waitcnt lgkmcnt(0)
 ; MUBUF11-NEXT:    v_mov_b32_e32 v0, s2
 ; MUBUF11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
@@ -116,6 +119,7 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr
 ; FLATSCR11-NEXT:    s_add_u32 s0, s0, svm_eval_nodes at rel32@lo+4
 ; FLATSCR11-NEXT:    s_addc_u32 s1, s1, svm_eval_nodes at rel32@hi+12
 ; FLATSCR11-NEXT:    s_movk_i32 s32, 0x6000
+; FLATSCR11-NEXT:    ; implicit-def: $sgpr15
 ; FLATSCR11-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLATSCR11-NEXT:    v_mov_b32_e32 v0, s2
 ; FLATSCR11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
index 251908b1b0f94..03b653782e5ca 100644
--- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll
@@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    global_store_dword v[44:45], v42, off
 ; GLOBALNESS1-NEXT:    s_waitcnt lgkmcnt(0)
 ; GLOBALNESS1-NEXT:    global_load_dword v2, v42, s[52:53]
-; GLOBALNESS1-NEXT:    s_mov_b64 s[48:49], s[4:5]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[38:39], s[4:5]
 ; GLOBALNESS1-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x18
 ; GLOBALNESS1-NEXT:    s_load_dword s7, s[8:9], 0x20
 ; GLOBALNESS1-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
@@ -68,7 +68,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GLOBALNESS1-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GLOBALNESS1-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS1-NEXT:    s_mov_b64 s[38:39], s[8:9]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[48:49], s[8:9]
 ; GLOBALNESS1-NEXT:    v_cmp_ne_u32_e64 s[8:9], 1, v1
 ; GLOBALNESS1-NEXT:    ; implicit-def: $vgpr57 : SGPR spill to VGPR lane
 ; GLOBALNESS1-NEXT:    v_cmp_ne_u32_e64 s[66:67], 1, v0
@@ -127,21 +127,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    ; =>This Loop Header: Depth=1
 ; GLOBALNESS1-NEXT:    ; Child Loop BB1_16 Depth 2
 ; GLOBALNESS1-NEXT:    flat_load_dword v40, v[46:47]
-; GLOBALNESS1-NEXT:    s_add_u32 s8, s38, 40
+; GLOBALNESS1-NEXT:    s_add_u32 s8, s48, 40
 ; GLOBALNESS1-NEXT:    buffer_store_dword v42, off, s[0:3], 0
 ; GLOBALNESS1-NEXT:    flat_load_dword v56, v[46:47]
-; GLOBALNESS1-NEXT:    s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT:    s_addc_u32 s9, s49, 0
 ; GLOBALNESS1-NEXT:    s_getpc_b64 s[4:5]
 ; GLOBALNESS1-NEXT:    s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
 ; GLOBALNESS1-NEXT:    s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
 ; GLOBALNESS1-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GLOBALNESS1-NEXT:    s_mov_b32 s12, s84
 ; GLOBALNESS1-NEXT:    s_mov_b32 s13, s83
 ; GLOBALNESS1-NEXT:    s_mov_b32 s14, s82
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS1-NEXT:    s_waitcnt lgkmcnt(0)
 ; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS1-NEXT:    s_and_b64 vcc, exec, s[70:71]
@@ -241,13 +242,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_15
 ; GLOBALNESS1-NEXT:  ; %bb.22: ; %bb55.i
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS1-NEXT:    s_add_u32 s70, s38, 40
-; GLOBALNESS1-NEXT:    s_addc_u32 s71, s39, 0
+; GLOBALNESS1-NEXT:    s_add_u32 s70, s48, 40
+; GLOBALNESS1-NEXT:    s_addc_u32 s71, s49, 0
 ; GLOBALNESS1-NEXT:    s_getpc_b64 s[4:5]
 ; GLOBALNESS1-NEXT:    s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
 ; GLOBALNESS1-NEXT:    s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
 ; GLOBALNESS1-NEXT:    s_load_dwordx2 s[54:55], s[4:5], 0x0
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[8:9], s[70:71]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
@@ -257,7 +258,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
 ; GLOBALNESS1-NEXT:    s_waitcnt lgkmcnt(0)
 ; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[54:55]
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[8:9], s[70:71]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
@@ -266,6 +267,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    s_mov_b32 s14, s82
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
 ; GLOBALNESS1-NEXT:    global_store_dwordx2 v[44:45], v[58:59], off
+; GLOBALNESS1-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[54:55]
 ; GLOBALNESS1-NEXT:    s_and_saveexec_b64 s[4:5], s[96:97]
 ; GLOBALNESS1-NEXT:    s_cbranch_execz .LBB1_14
@@ -275,7 +277,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    s_branch .LBB1_14
 ; GLOBALNESS1-NEXT:  .LBB1_24: ; %Flow23
 ; GLOBALNESS1-NEXT:    ; in Loop: Header=BB1_4 Depth=1
-; GLOBALNESS1-NEXT:    s_load_dwordx4 s[4:7], s[38:39], 0x0
+; GLOBALNESS1-NEXT:    s_load_dwordx4 s[4:7], s[48:49], 0x0
 ; GLOBALNESS1-NEXT:    v_readlane_b32 s70, v57, 8
 ; GLOBALNESS1-NEXT:    v_readlane_b32 s8, v57, 10
 ; GLOBALNESS1-NEXT:    v_pk_mov_b32 v[0:1], 0, 0
@@ -307,36 +309,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], -1
 ; GLOBALNESS1-NEXT:    s_cbranch_vccz .LBB1_31
 ; GLOBALNESS1-NEXT:  ; %bb.30: ; %bb7.i.i
-; GLOBALNESS1-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT:    s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT:    s_add_u32 s8, s48, 40
+; GLOBALNESS1-NEXT:    s_addc_u32 s9, s49, 0
 ; GLOBALNESS1-NEXT:    s_getpc_b64 s[16:17]
 ; GLOBALNESS1-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
 ; GLOBALNESS1-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GLOBALNESS1-NEXT:    s_mov_b32 s12, s84
 ; GLOBALNESS1-NEXT:    s_mov_b32 s13, s83
 ; GLOBALNESS1-NEXT:    s_mov_b32 s14, s82
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], 0
 ; GLOBALNESS1-NEXT:  .LBB1_31: ; %Flow
 ; GLOBALNESS1-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS1-NEXT:    s_cbranch_vccnz .LBB1_33
 ; GLOBALNESS1-NEXT:  ; %bb.32: ; %bb11.i.i
-; GLOBALNESS1-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS1-NEXT:    s_addc_u32 s9, s39, 0
+; GLOBALNESS1-NEXT:    s_add_u32 s8, s48, 40
+; GLOBALNESS1-NEXT:    s_addc_u32 s9, s49, 0
 ; GLOBALNESS1-NEXT:    s_getpc_b64 s[16:17]
 ; GLOBALNESS1-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
 ; GLOBALNESS1-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS1-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS1-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GLOBALNESS1-NEXT:    s_mov_b32 s12, s84
 ; GLOBALNESS1-NEXT:    s_mov_b32 s13, s83
 ; GLOBALNESS1-NEXT:    s_mov_b32 s14, s82
 ; GLOBALNESS1-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS1-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS1-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS1-NEXT:  .LBB1_33: ; %UnifiedUnreachableBlock
 ;
@@ -350,7 +354,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    global_store_dword v[44:45], v42, off
 ; GLOBALNESS0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GLOBALNESS0-NEXT:    global_load_dword v2, v42, s[52:53]
-; GLOBALNESS0-NEXT:    s_mov_b64 s[48:49], s[4:5]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[38:39], s[4:5]
 ; GLOBALNESS0-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x18
 ; GLOBALNESS0-NEXT:    s_load_dword s7, s[8:9], 0x20
 ; GLOBALNESS0-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
@@ -379,7 +383,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
 ; GLOBALNESS0-NEXT:    s_cselect_b64 s[4:5], -1, 0
 ; GLOBALNESS0-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
-; GLOBALNESS0-NEXT:    s_mov_b64 s[38:39], s[8:9]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[48:49], s[8:9]
 ; GLOBALNESS0-NEXT:    v_cmp_ne_u32_e64 s[8:9], 1, v1
 ; GLOBALNESS0-NEXT:    ; implicit-def: $vgpr57 : SGPR spill to VGPR lane
 ; GLOBALNESS0-NEXT:    v_cmp_ne_u32_e64 s[66:67], 1, v0
@@ -438,21 +442,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    ; =>This Loop Header: Depth=1
 ; GLOBALNESS0-NEXT:    ; Child Loop BB1_16 Depth 2
 ; GLOBALNESS0-NEXT:    flat_load_dword v40, v[46:47]
-; GLOBALNESS0-NEXT:    s_add_u32 s8, s38, 40
+; GLOBALNESS0-NEXT:    s_add_u32 s8, s48, 40
 ; GLOBALNESS0-NEXT:    buffer_store_dword v42, off, s[0:3], 0
 ; GLOBALNESS0-NEXT:    flat_load_dword v56, v[46:47]
-; GLOBALNESS0-NEXT:    s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT:    s_addc_u32 s9, s49, 0
 ; GLOBALNESS0-NEXT:    s_getpc_b64 s[4:5]
 ; GLOBALNESS0-NEXT:    s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
 ; GLOBALNESS0-NEXT:    s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
 ; GLOBALNESS0-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GLOBALNESS0-NEXT:    s_mov_b32 s12, s82
 ; GLOBALNESS0-NEXT:    s_mov_b32 s13, s71
 ; GLOBALNESS0-NEXT:    s_mov_b32 s14, s70
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS0-NEXT:    s_and_b64 vcc, exec, s[84:85]
@@ -553,13 +558,13 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_15
 ; GLOBALNESS0-NEXT:  ; %bb.22: ; %bb55.i
 ; GLOBALNESS0-NEXT:    ; in Loop: Header=BB1_16 Depth=2
-; GLOBALNESS0-NEXT:    s_add_u32 s84, s38, 40
-; GLOBALNESS0-NEXT:    s_addc_u32 s85, s39, 0
+; GLOBALNESS0-NEXT:    s_add_u32 s84, s48, 40
+; GLOBALNESS0-NEXT:    s_addc_u32 s85, s49, 0
 ; GLOBALNESS0-NEXT:    s_getpc_b64 s[4:5]
 ; GLOBALNESS0-NEXT:    s_add_u32 s4, s4, wobble at gotpcrel32@lo+4
 ; GLOBALNESS0-NEXT:    s_addc_u32 s5, s5, wobble at gotpcrel32@hi+12
 ; GLOBALNESS0-NEXT:    s_load_dwordx2 s[54:55], s[4:5], 0x0
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[8:9], s[84:85]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
@@ -569,7 +574,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
 ; GLOBALNESS0-NEXT:    s_waitcnt lgkmcnt(0)
 ; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[54:55]
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[8:9], s[84:85]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
@@ -578,6 +583,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    s_mov_b32 s14, s70
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
 ; GLOBALNESS0-NEXT:    global_store_dwordx2 v[44:45], v[58:59], off
+; GLOBALNESS0-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[54:55]
 ; GLOBALNESS0-NEXT:    s_and_saveexec_b64 s[4:5], s[96:97]
 ; GLOBALNESS0-NEXT:    s_cbranch_execz .LBB1_14
@@ -617,36 +623,38 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], -1
 ; GLOBALNESS0-NEXT:    s_cbranch_vccz .LBB1_31
 ; GLOBALNESS0-NEXT:  ; %bb.30: ; %bb7.i.i
-; GLOBALNESS0-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT:    s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT:    s_add_u32 s8, s48, 40
+; GLOBALNESS0-NEXT:    s_addc_u32 s9, s49, 0
 ; GLOBALNESS0-NEXT:    s_getpc_b64 s[16:17]
 ; GLOBALNESS0-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
 ; GLOBALNESS0-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GLOBALNESS0-NEXT:    s_mov_b32 s12, s82
 ; GLOBALNESS0-NEXT:    s_mov_b32 s13, s71
 ; GLOBALNESS0-NEXT:    s_mov_b32 s14, s70
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], 0
 ; GLOBALNESS0-NEXT:  .LBB1_31: ; %Flow
 ; GLOBALNESS0-NEXT:    s_andn2_b64 vcc, exec, s[4:5]
 ; GLOBALNESS0-NEXT:    s_cbranch_vccnz .LBB1_33
 ; GLOBALNESS0-NEXT:  ; %bb.32: ; %bb11.i.i
-; GLOBALNESS0-NEXT:    s_add_u32 s8, s38, 40
-; GLOBALNESS0-NEXT:    s_addc_u32 s9, s39, 0
+; GLOBALNESS0-NEXT:    s_add_u32 s8, s48, 40
+; GLOBALNESS0-NEXT:    s_addc_u32 s9, s49, 0
 ; GLOBALNESS0-NEXT:    s_getpc_b64 s[16:17]
 ; GLOBALNESS0-NEXT:    s_add_u32 s16, s16, widget at rel32@lo+4
 ; GLOBALNESS0-NEXT:    s_addc_u32 s17, s17, widget at rel32@hi+12
-; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[48:49]
+; GLOBALNESS0-NEXT:    s_mov_b64 s[4:5], s[38:39]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[6:7], s[36:37]
 ; GLOBALNESS0-NEXT:    s_mov_b64 s[10:11], s[34:35]
 ; GLOBALNESS0-NEXT:    s_mov_b32 s12, s82
 ; GLOBALNESS0-NEXT:    s_mov_b32 s13, s71
 ; GLOBALNESS0-NEXT:    s_mov_b32 s14, s70
 ; GLOBALNESS0-NEXT:    v_mov_b32_e32 v31, v41
+; GLOBALNESS0-NEXT:    ; implicit-def: $sgpr15
 ; GLOBALNESS0-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GLOBALNESS0-NEXT:  .LBB1_33: ; %UnifiedUnreachableBlock
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
index 3558c4e1d0c85..485d7d3e75b62 100644
--- a/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
+++ b/llvm/test/CodeGen/AMDGPU/undef-handling-crash-in-ra.ll
@@ -66,6 +66,7 @@ define amdgpu_kernel void @foo(ptr addrspace(5) %ptr5, ptr %p0, double %v0, <4 x
 ; CHECK-NEXT:    flat_store_dwordx2 v[44:45], v[58:59]
 ; CHECK-NEXT:    flat_store_dwordx2 v[56:57], v[62:63]
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[54:55]
 ; CHECK-NEXT:    flat_load_dwordx2 v[0:1], v[46:47] glc
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index eba9faae5fdb1..8206fd0b4fe13 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -150,6 +150,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-NEXT:    extsh r9, r3
 ; CHECK-NEXT:    extsw r6, r28
 ; CHECK-NEXT:    li r5, 0
+; CHECK-NEXT:    # implicit-def: $f2
 ; CHECK-NEXT:    li r7, 0
 ; CHECK-NEXT:    std r30, 104(r1)
 ; CHECK-NEXT:    std r29, 96(r1)
@@ -308,6 +309,7 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-BE-NEXT:    extsh r9, r3
 ; CHECK-BE-NEXT:    extsw r6, r28
 ; CHECK-BE-NEXT:    li r5, 0
+; CHECK-BE-NEXT:    # implicit-def: $f2
 ; CHECK-BE-NEXT:    li r7, 0
 ; CHECK-BE-NEXT:    std r30, 120(r1)
 ; CHECK-BE-NEXT:    std r29, 112(r1)
diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
index 3803ac82458bd..81b640d17c620 100644
--- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
+++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
@@ -24,14 +24,22 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:    addi s0, sp, 32
 ; CHECK-NEXT:    .cfi_def_cfa s0, 0
 ; CHECK-NEXT:    .cfi_remember_state
-; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:  .Ltmp0: # EH_LABEL
 ; CHECK-NEXT:    addi sp, sp, -32
+; CHECK-NEXT:    # implicit-def: $x10
+; CHECK-NEXT:    # implicit-def: $x11
+; CHECK-NEXT:    # implicit-def: $x12
+; CHECK-NEXT:    # implicit-def: $x13
+; CHECK-NEXT:    # implicit-def: $x14
+; CHECK-NEXT:    # implicit-def: $x15
+; CHECK-NEXT:    # implicit-def: $x16
+; CHECK-NEXT:    # implicit-def: $x17
 ; CHECK-NEXT:    call _Z3fooiiiiiiiiiiPi
 ; CHECK-NEXT:    addi sp, sp, 32
-; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:  .Ltmp1: # EH_LABEL
 ; CHECK-NEXT:  # %bb.1: # %try.cont.unreachable
 ; CHECK-NEXT:  .LBB0_2: # %lpad
-; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:  .Ltmp2: # EH_LABEL
 ; CHECK-NEXT:    sext.w a1, a1
 ; CHECK-NEXT:    li a2, 1
 ; CHECK-NEXT:    bne a1, a2, .LBB0_4
diff --git a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
index 17027e2e4e5b4..8e2af5b44b3a9 100644
--- a/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
@@ -13,6 +13,7 @@ define i32 @test(i32 %n) nounwind {
 ; CHECK-NEXT:  .LBB0_1: @ %bb
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    bl f
+; CHECK-NEXT:    @ implicit-def: $r1
 ; CHECK-NEXT:    bl g
 ; CHECK-NEXT:    subs r4, #1
 ; CHECK-NEXT:    bne .LBB0_1
@@ -58,6 +59,7 @@ define i32 @test_dead_cycle(i32 %n) nounwind {
 ; CHECK-NEXT:  @ %bb.2: @ %bb1
 ; CHECK-NEXT:    @ in Loop: Header=BB1_1 Depth=1
 ; CHECK-NEXT:    bl f
+; CHECK-NEXT:    @ implicit-def: $r1
 ; CHECK-NEXT:    bl g
 ; CHECK-NEXT:  .LBB1_3: @ %bb2
 ; CHECK-NEXT:    @ in Loop: Header=BB1_1 Depth=1
diff --git a/llvm/test/CodeGen/X86/issue76416.ll b/llvm/test/CodeGen/X86/issue76416.ll
index 7193e54a6ad55..14786e5040da4 100644
--- a/llvm/test/CodeGen/X86/issue76416.ll
+++ b/llvm/test/CodeGen/X86/issue76416.ll
@@ -26,6 +26,7 @@ define dso_local void @vga_load_state() #0 {
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_4: # %for.cond1
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    # implicit-def: $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    movq vga_load_state_p(%rip), %rax
diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
index bd997d1647766..09d1dd9ccef95 100644
--- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
+++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -24,6 +24,7 @@ define void @PR24199(i32 %a0) {
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:  .LBB0_3: # %if.end
 ; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    # implicit-def: $rdi
 ; CHECK-NEXT:    callq foo at PLT
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
new file mode 100644
index 0000000000000..d762e80792505
--- /dev/null
+++ b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
@@ -0,0 +1,15 @@
+# Ensure processing an IMPLICIT_DEF of a physreg updates all uses
+# before removing the IMPLICIT_DEF.  -verify-machineinstrs will
+# fail otherwise.
+
+# RUN: llc -x mir < %s -verify-machineinstrs | FileCheck %s
+---
+# CHECK-LABEL: implicit_def:
+name: implicit_def
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    $eax = IMPLICIT_DEF
+    MOV32mr $rip, 1, $noreg, 12, $noreg, $eax
+    RET 0, $eax
+...
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c24823538aa14..69a6cdb7081eb 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -67,10 +67,11 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    cmpq %rax, %rcx
 ; CHECK-NEXT:    jae LBB0_8
 ; CHECK-NEXT:  ## %bb.7: ## %for.body.lr.ph
+; CHECK-NEXT:    movq %rdi, %r14
+; CHECK-NEXT:    ## implicit-def: $rdi
 ; CHECK-NEXT:    movq %rdx, %rbx
 ; CHECK-NEXT:    movl $512, %edx ## imm = 0x200
 ; CHECK-NEXT:    movl $32, %esi
-; CHECK-NEXT:    movq %rdi, %r14
 ; CHECK-NEXT:    callq _memset
 ; CHECK-NEXT:    movq %r14, %rdi
 ; CHECK-NEXT:    movq %rbx, %rdx
@@ -156,6 +157,7 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    jne LBB0_31
 ; CHECK-NEXT:  ## %bb.30: ## %lor.rhs500
 ; CHECK-NEXT:    ## in Loop: Header=BB0_28 Depth=2
+; CHECK-NEXT:    ## implicit-def: $edi
 ; CHECK-NEXT:    movl $256, %esi ## imm = 0x100
 ; CHECK-NEXT:    callq ___maskrune
 ; CHECK-NEXT:    movb $1, %sil
@@ -258,6 +260,8 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
 ; CHECK-NEXT:  LBB0_47: ## %if.then1477
+; CHECK-NEXT:    ## implicit-def: $edi
+; CHECK-NEXT:    ## implicit-def: $rsi
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
 ; CHECK-NEXT:    subq %rbx, %r14
diff --git a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
index ea1ca51908134..9663ab995d64f 100644
--- a/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
+++ b/llvm/test/CodeGen/X86/regalloc-advanced-split-cost.ll
@@ -14,7 +14,7 @@
 ; The new code choses %ebp as the split candidate as it has lower spill cost.
 
 ; Make sure the split behaves as expected
-; CHECK: RS_Split Cascade 1
+; CHECK: RS_Split Cascade 0
 ; CHECK-NOT: $eax	static = 
 ; CHECK: $eax	no positive bundles
 ; CHECK-NEXT: $ecx	no positive bundles
diff --git a/llvm/test/CodeGen/X86/statepoint-two-results.ll b/llvm/test/CodeGen/X86/statepoint-two-results.ll
index 4993c292dc553..82467841910b2 100644
--- a/llvm/test/CodeGen/X86/statepoint-two-results.ll
+++ b/llvm/test/CodeGen/X86/statepoint-two-results.ll
@@ -8,6 +8,7 @@ define void @quux() gc "statepoint-example" {
 ; CHECK:       # %bb.0: # %bb1
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    # implicit-def: $rdi
 ; CHECK-NEXT:    movl $4, %esi
 ; CHECK-NEXT:    callq wombat at PLT
 ; CHECK-NEXT:  .Ltmp0:
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
index ef542e5b1427a..f08b388c3dc56 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-invoke.ll
@@ -183,6 +183,11 @@ define void @test_duplicate_ir_values() gc "statepoint-example" personality ptr
   ; CHECK-NEXT:   MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $rax :: (store (s64) into %stack.0)
   ; CHECK-NEXT:   EH_LABEL <mcsymbol >
   ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   dead $edi = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead $rsi = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead $edx = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead $ecx = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead $r8d = IMPLICIT_DEF
   ; CHECK-NEXT:   STATEPOINT 1, 16, 5, undef renamable $rax, undef $edi, undef $rsi, undef $edx, undef $ecx, undef $r8d, 2, 0, 2, 0, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (volatile load store (s64) on %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
   ; CHECK-NEXT:   EH_LABEL <mcsymbol >
diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
index 72e4fe410e269..f3e6d8b5489c3 100644
--- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll
@@ -109,6 +109,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB1_12
 ; CHECK-NEXT:  # %bb.2: # %if.end50
+; CHECK-NEXT:    # implicit-def: $rsi
 ; CHECK-NEXT:    movq %r14, %rdi
 ; CHECK-NEXT:    movq %r15, %rdx
 ; CHECK-NEXT:    callq memcpy at PLT
@@ -170,6 +171,7 @@ define i32 @loop_shared_header(ptr %exe, i32 %exesz, i32 %headsize, i32 %min, i3
 ; CHECK-NEXT:  .LBB1_11: # %if.then99.i
 ; CHECK-NEXT:    movq .str.6 at GOTPCREL(%rip), %rdi
 ; CHECK-NEXT:    xorl %ebx, %ebx
+; CHECK-NEXT:    # implicit-def: $esi
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    callq cli_dbgmsg at PLT
 ; CHECK-NEXT:  .LBB1_12: # %cleanup

>From 4b74beaf1aa671a777ec3bb39b72d151b8c035bc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Mon, 23 Mar 2026 16:33:07 -0500
Subject: [PATCH 02/11] Apply clang-format

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index a05d520678cc0..034a06e75de94 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -107,7 +107,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
   bool ImplicitDefIsDead = false;
   for (++UserMI; UserMI != UserE; ++UserMI) {
-    bool DefinesReg=false;
+    bool DefinesReg = false;
     for (MachineOperand &MO : UserMI->operands()) {
       if (!MO.isReg())
         continue;
@@ -115,10 +115,10 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
       if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
         continue;
       // UserMI uses or redefines Reg. Set <undef> flags on all uses.
-      if (!ImplicitDefIsDead && MO.isUse() )
+      if (!ImplicitDefIsDead && MO.isUse())
         MO.setIsUndef();
       if (MO.isDef())
-	DefinesReg = true;
+        DefinesReg = true;
     }
     if (DefinesReg) {
       ImplicitDefIsDead = true;
@@ -126,8 +126,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
     }
   }
 
-  // If we have added an undef flag to all uses (i.e. we have found a redefining MI or
-  // there are no successors), we can erase the IMPLICIT_DEF.
+  // If we have added an undef flag to all uses (i.e. we have found a redefining
+  // MI or there are no successors), we can erase the IMPLICIT_DEF.
   if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
     LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
     MI->eraseFromParent();

>From 8104f6e297ca6fdfcbf249ab3d98aca696624fc3 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 26 Mar 2026 09:58:00 -0500
Subject: [PATCH 03/11] Cleanup code, update comments, and ensure IMPLICIT_DEF
 is fully redefined.

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 034a06e75de94..5c5c0c0dfaf25 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -102,22 +102,23 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   }
 
   // This is a physreg implicit-def.
-  // Look for the first instruction to use or define an alias.
-  MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
-  MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
+  // Try to add undef flag to all uses.  If all uses are updated remove
+  // implicit-def.
+  MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
+  MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
   bool ImplicitDefIsDead = false;
-  for (++UserMI; UserMI != UserE; ++UserMI) {
+  for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
     bool DefinesReg = false;
-    for (MachineOperand &MO : UserMI->operands()) {
+    for (MachineOperand &MO : SearchMI->operands()) {
       if (!MO.isReg())
         continue;
-      Register UserReg = MO.getReg();
-      if (!UserReg.isPhysical() || !TRI->regsOverlap(Reg, UserReg))
+      Register SearchReg = MO.getReg();
+      if (!SearchReg.isPhysical() || !TRI->regsOverlap(Reg, SearchReg))
         continue;
-      // UserMI uses or redefines Reg. Set <undef> flags on all uses.
-      if (!ImplicitDefIsDead && MO.isUse())
+      // SearchMI uses or redefines Reg. Set <undef> flags on all uses.
+      if (MO.isUse())
         MO.setIsUndef();
-      if (MO.isDef())
+      if (MO.isDef() && TRI->isSubRegisterEq(SearchReg, Reg))
         DefinesReg = true;
     }
     if (DefinesReg) {
@@ -129,7 +130,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   // If we have added an undef flag to all uses (i.e. we have found a redefining
   // MI or there are no successors), we can erase the IMPLICIT_DEF.
   if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
-    LLVM_DEBUG(dbgs() << "Physreg user: " << *UserMI);
+    if (ImplicitDefIsDead)
+      LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
     MI->eraseFromParent();
     return;
   }

>From 117307a4e48202ef8cc7fd278905581181a0f1a6 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 26 Mar 2026 10:40:43 -0500
Subject: [PATCH 04/11] Add search limit

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 5c5c0c0dfaf25..847c249dda65b 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -107,8 +107,15 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
   MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
   bool ImplicitDefIsDead = false;
+  bool SearchedWholeBlock = true;
+  constexpr unsigned SearchLimit = 60000;
+  unsigned Count = 0;
   for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
     bool DefinesReg = false;
+    if (++Count > SearchLimit) {
+      SearchedWholeBlock = false;
+      break;
+    }
     for (MachineOperand &MO : SearchMI->operands()) {
       if (!MO.isReg())
         continue;
@@ -129,7 +136,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
 
   // If we have added an undef flag to all uses (i.e. we have found a redefining
   // MI or there are no successors), we can erase the IMPLICIT_DEF.
-  if (ImplicitDefIsDead || MI->getParent()->succ_empty()) {
+  if (ImplicitDefIsDead ||
+      (SearchedWholeBlock && MI->getParent()->succ_empty())) {
     if (ImplicitDefIsDead)
       LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
     MI->eraseFromParent();

>From 170a932913d01050349ce759066cb58cc0d9a5d4 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Thu, 26 Mar 2026 13:08:13 -0500
Subject: [PATCH 05/11] Autogen results

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/test/CodeGen/X86/multi-use-implicit-def.mir | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
index d762e80792505..051157dc015bc 100644
--- a/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
+++ b/llvm/test/CodeGen/X86/multi-use-implicit-def.mir
@@ -1,14 +1,19 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
 # Ensure processing an IMPLICIT_DEF of a physreg updates all uses
 # before removing the IMPLICIT_DEF.  -verify-machineinstrs will
 # fail otherwise.
 
-# RUN: llc -x mir < %s -verify-machineinstrs | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -x mir -stop-after processimpdefs < %s -verify-machineinstrs | FileCheck %s
 ---
-# CHECK-LABEL: implicit_def:
 name: implicit_def
 tracksRegLiveness: true
 body: |
   bb.0.entry:
+    ; CHECK-LABEL: name: implicit_def
+    ; CHECK: MOV32mr $rip, 1, $noreg, 12, $noreg, undef $eax
+    ; CHECK-NEXT: RET 0, undef $eax
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: .1.entry:
     $eax = IMPLICIT_DEF
     MOV32mr $rip, 1, $noreg, 12, $noreg, $eax
     RET 0, $eax

>From b4fb9e50064ea649c77a66c60150adfaae3dc4db Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 27 Mar 2026 09:32:24 -0500
Subject: [PATCH 06/11] Simplify logic based on feedback

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 847c249dda65b..3c431b8c10bbe 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -111,7 +111,6 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   constexpr unsigned SearchLimit = 60000;
   unsigned Count = 0;
   for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
-    bool DefinesReg = false;
     if (++Count > SearchLimit) {
       SearchedWholeBlock = false;
       break;
@@ -126,10 +125,10 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
       if (MO.isUse())
         MO.setIsUndef();
       if (MO.isDef() && TRI->isSubRegisterEq(SearchReg, Reg))
-        DefinesReg = true;
+        ImplicitDefIsDead = true;
     }
-    if (DefinesReg) {
-      ImplicitDefIsDead = true;
+    if (ImplicitDefIsDead) {
+      LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
       break;
     }
   }
@@ -138,8 +137,6 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   // MI or there are no successors), we can erase the IMPLICIT_DEF.
   if (ImplicitDefIsDead ||
       (SearchedWholeBlock && MI->getParent()->succ_empty())) {
-    if (ImplicitDefIsDead)
-      LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
     MI->eraseFromParent();
     return;
   }

>From 8ba7ecda66bc3fc5d08c68e8f99545ff7e362bd9 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 27 Mar 2026 10:24:43 -0500
Subject: [PATCH 07/11] Reduce search limit

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp       |  2 +-
 ...ine-sink-temporal-divergence-swdev407790.ll |  2 ++
 llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll     |  2 ++
 .../AMDGPU/promote-constOffset-to-imm.ll       | 18 ++++++++++++++++++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 3c431b8c10bbe..da1c86f3f974c 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -108,7 +108,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   MachineBasicBlock::instr_iterator SearchE = MI->getParent()->instr_end();
   bool ImplicitDefIsDead = false;
   bool SearchedWholeBlock = true;
-  constexpr unsigned SearchLimit = 60000;
+  constexpr unsigned SearchLimit = 35;
   unsigned Count = 0;
   for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
     if (++Count > SearchLimit) {
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 951bf1e6eeac8..15f2dc086bdd1 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -77,6 +77,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_mov_b32_e32 v41, v0
 ; CHECK-NEXT:    v_mov_b32_e32 v31, v40
@@ -831,6 +832,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 ; CHECK-NEXT:    s_mov_b32 s12, s51
 ; CHECK-NEXT:    s_mov_b32 s13, s50
 ; CHECK-NEXT:    s_mov_b32 s14, s33
+; CHECK-NEXT:    ; implicit-def: $sgpr15
 ; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; CHECK-NEXT:    v_mul_lo_u32 v44, v0, 14
 ; CHECK-NEXT:    v_mov_b32_e32 v31, v40
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index 22b9a9144c68d..263298d1cd1c8 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -402,6 +402,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr add
 ; GFX908:       ; %bb.0: ; %bb
 ; GFX908-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; GFX908-NEXT:    v_mov_b32_e32 v32, 0
+; GFX908-NEXT:    ; implicit-def: $agpr100_agpr101_agpr102_agpr103_agpr104_agpr105_agpr106_agpr107_agpr108_agpr109_agpr110_agpr111_agpr112_agpr113_agpr114_agpr115_agpr116_agpr117_agpr118_agpr119_agpr120_agpr121_agpr122_agpr123_agpr124_agpr125_agpr126_agpr127_agpr128_agpr129_agpr130_agpr131
 ; GFX908-NEXT:    ;;#ASMSTART
 ; GFX908-NEXT:    ; use a[100:131]
 ; GFX908-NEXT:    ;;#ASMEND
@@ -638,6 +639,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg)
 ; GFX908-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX908-NEXT:    s_mov_b32 s32, 0
 ; GFX908-NEXT:    v_mov_b32_e32 v40, 0
+; GFX908-NEXT:    ; implicit-def: $sgpr15
 ; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX908-NEXT:    s_swappc_b64 s[30:31], s[16:17]
 ; GFX908-NEXT:    global_load_dwordx4 v[28:31], v40, s[34:35] offset:112
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index 7d140844278bb..6084381da84f9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -27,6 +27,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -108,6 +109,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -178,6 +180,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX10-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX10-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX10-NEXT:    s_mov_b32 s32, 0
+; GFX10-NEXT:    ; implicit-def: $sgpr15
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -241,6 +244,7 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1)  %buffer) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_load_b64 s[34:35], s[4:5], 0x24
 ; GFX11-NEXT:    s_mov_b32 s32, 0
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1041,6 +1045,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1127,6 +1132,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1184,6 +1190,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX10-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX10-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX10-NEXT:    s_mov_b32 s32, 0
+; GFX10-NEXT:    ; implicit-def: $sgpr15
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1241,6 +1248,7 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_load_b64 s[34:35], s[4:5], 0x24
 ; GFX11-NEXT:    s_mov_b32 s32, 0
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1358,6 +1366,7 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1583,6 +1592,7 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1)  %buffer) {
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -1791,6 +1801,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[50:51]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
@@ -1854,6 +1865,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[50:51]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
@@ -1913,6 +1925,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
 ; GFX10-NEXT:    s_mov_b64 s[0:1], s[48:49]
 ; GFX10-NEXT:    s_mov_b64 s[2:3], s[50:51]
 ; GFX10-NEXT:    s_mov_b32 s32, 0
+; GFX10-NEXT:    ; implicit-def: $sgpr15
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
@@ -1963,6 +1976,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1,
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_load_b128 s[36:39], s[4:5], 0x24
 ; GFX11-NEXT:    s_mov_b32 s32, 0
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
@@ -2063,6 +2077,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX8-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX8-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, 0
+; GFX8-NEXT:    ; implicit-def: $sgpr15
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -2144,6 +2159,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    s_mov_b32 s32, 0
+; GFX9-NEXT:    ; implicit-def: $sgpr15
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -2213,6 +2229,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX10-NEXT:    s_mov_b64 s[0:1], s[36:37]
 ; GFX10-NEXT:    s_mov_b64 s[2:3], s[38:39]
 ; GFX10-NEXT:    s_mov_b32 s32, 0
+; GFX10-NEXT:    ; implicit-def: $sgpr15
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 7, v0
@@ -2280,6 +2297,7 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) {
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 ; GFX11-NEXT:    s_load_b64 s[34:35], s[4:5], 0x24
 ; GFX11-NEXT:    s_mov_b32 s32, 0
+; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 7, v0

>From d709f75bca25b5b0926b06f68f0c83f09f9288b3 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 27 Mar 2026 13:35:03 -0500
Subject: [PATCH 08/11] Cannot add undef to uses that are not fully undef

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp      | 33 +++++++++------
 .../CodeGen/AMDGPU/subreg-implicit-def.mir    | 42 +++++++++++++++++++
 2 files changed, 63 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index da1c86f3f974c..161fc558d0ebe 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -102,6 +102,10 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   }
 
   // This is a physreg implicit-def.
+  // Trim any extra operands.
+  for (unsigned i = MI->getNumOperands() - 1; i; --i)
+    MI->removeOperand(i);
+
   // Try to add undef flag to all uses.  If all uses are updated remove
   // implicit-def.
   MachineBasicBlock::instr_iterator SearchMI = MI->getIterator();
@@ -122,13 +126,24 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
       if (!SearchReg.isPhysical() || !TRI->regsOverlap(Reg, SearchReg))
         continue;
       // SearchMI uses or redefines Reg. Set <undef> flags on all uses.
-      if (MO.isUse())
-        MO.setIsUndef();
-      if (MO.isDef() && TRI->isSubRegisterEq(SearchReg, Reg))
-        ImplicitDefIsDead = true;
+      if (MO.isUse()) {
+        if (TRI->isSubRegisterEq(Reg, SearchReg))
+          MO.setIsUndef();
+        else
+          // Use is larger than Reg.  It is not safe to add undef to this use.
+          return;
+      }
+      if (MO.isDef()) {
+        if (TRI->isSubRegisterEq(SearchReg, Reg))
+          ImplicitDefIsDead = true;
+        else
+          // Reg is larger than definition.  It is not safe to add undef to any
+          // subsequent uses of Reg.
+          return;
+      }
     }
     if (ImplicitDefIsDead) {
-      LLVM_DEBUG(dbgs() << "Physreg def: " << *SearchMI);
+      LLVM_DEBUG(dbgs() << "Physreg redefine: " << *SearchMI);
       break;
     }
   }
@@ -138,14 +153,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   if (ImplicitDefIsDead ||
       (SearchedWholeBlock && MI->getParent()->succ_empty())) {
     MI->eraseFromParent();
-    return;
+    LLVM_DEBUG(dbgs() << "Deleting implicit-def: " << *MI);
   }
-
-  // Using instr wasn't found, it could be in another block.
-  // Leave the physreg IMPLICIT_DEF, but trim any extra operands.
-  for (unsigned i = MI->getNumOperands() - 1; i; --i)
-    MI->removeOperand(i);
-  LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI);
 }
 
 bool ProcessImplicitDefsLegacy::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
new file mode 100644
index 0000000000000..9eb52fe4f82a0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/subreg-implicit-def.mir
@@ -0,0 +1,42 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+
+# Ensure processing an IMPLICIT_DEF of a physreg handles subreg definitions
+# and super-reg uses correctly.
+
+
+# RUN: llc -mtriple=amdgcn -mcpu=gfx802  -run-pass processimpdefs -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck %s
+
+# Must not add undef to use of implicit-def because def is partially redefined.
+---
+name: impdef_subreg_def
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: impdef_subreg_def
+    ; CHECK: $sgpr0_sgpr1 = IMPLICIT_DEF
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0
+    ; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: S_ENDPGM 0
+    $sgpr0_sgpr1 = IMPLICIT_DEF
+    $sgpr0 = S_MOV_B32 0
+    $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+    S_ENDPGM 0
+...
+
+# Must not add undef to use of implicit-def because use is larger than implicit definition.
+---
+name: impdef_superreg_use
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: impdef_superreg_use
+    ; CHECK: $sgpr0 = IMPLICIT_DEF
+    ; CHECK-NEXT: $sgpr1 = S_MOV_B32 0
+    ; CHECK-NEXT: $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: S_ENDPGM 0
+    $sgpr0 = IMPLICIT_DEF
+    $sgpr1 = S_MOV_B32 0
+    $sgpr2_sgpr3 = COPY $sgpr0_sgpr1
+    S_ENDPGM 0
+...
+

>From a1fafa0aec759756ab9e5623769dfee4c0fdb416 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Tue, 31 Mar 2026 10:32:10 -0500
Subject: [PATCH 09/11] Skip debug instructions

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 161fc558d0ebe..15fbebd3d1353 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -115,6 +115,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
   constexpr unsigned SearchLimit = 35;
   unsigned Count = 0;
   for (++SearchMI; SearchMI != SearchE; ++SearchMI) {
+    if (SearchMI->isDebugInstr())
+      continue;
     if (++Count > SearchLimit) {
       SearchedWholeBlock = false;
       break;

>From 96b5b06266aea7a23af745dd850660e929bad7d5 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Tue, 31 Mar 2026 11:41:13 -0500
Subject: [PATCH 10/11] Add braces

Signed-off-by: John Lu <John.Lu at amd.com>
---
 llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 15fbebd3d1353..5fc765d90f6e4 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -129,19 +129,21 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
         continue;
       // SearchMI uses or redefines Reg. Set <undef> flags on all uses.
       if (MO.isUse()) {
-        if (TRI->isSubRegisterEq(Reg, SearchReg))
+        if (TRI->isSubRegisterEq(Reg, SearchReg)) {
           MO.setIsUndef();
-        else
+        } else {
           // Use is larger than Reg.  It is not safe to add undef to this use.
           return;
+        }
       }
       if (MO.isDef()) {
-        if (TRI->isSubRegisterEq(SearchReg, Reg))
+        if (TRI->isSubRegisterEq(SearchReg, Reg)) {
           ImplicitDefIsDead = true;
-        else
+        } else {
           // Reg is larger than definition.  It is not safe to add undef to any
           // subsequent uses of Reg.
           return;
+        }
       }
     }
     if (ImplicitDefIsDead) {

>From 8993c705a0436f0f0518b4fb5e6cacb5fe57ef8c Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Wed, 1 Apr 2026 07:53:45 -0500
Subject: [PATCH 11/11] Update tests

Signed-off-by: John Lu <John.Lu at amd.com>
---
 .../CodeGen/AMDGPU/insert-delay-alu-bug.ll    | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index 880cfbf12b931..7d8a0b70d5f55 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -57,7 +57,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11:       ; %bb.0: ; %bb
 ; GFX11-NEXT:    s_mov_b64 s[18:19], s[4:5]
 ; GFX11-NEXT:    v_mov_b32_e32 v31, v0
-; GFX11-NEXT:    s_load_b32 s24, s[18:19], 0x24
+; GFX11-NEXT:    s_load_b32 s25, s[18:19], 0x24
+; GFX11-NEXT:    s_mov_b32 s17, s15
 ; GFX11-NEXT:    s_mov_b32 s12, s13
 ; GFX11-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
@@ -65,10 +66,10 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_mov_b64 s[4:5], s[0:1]
 ; GFX11-NEXT:    s_mov_b32 s20, 0
 ; GFX11-NEXT:    s_mov_b32 s0, -1
-; GFX11-NEXT:    s_mov_b32 s17, exec_lo
+; GFX11-NEXT:    s_mov_b32 s24, exec_lo
 ; GFX11-NEXT:    s_mov_b32 s32, 0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_mul_lo_u32 v0, s24, v0
+; GFX11-NEXT:    v_mul_lo_u32 v0, s25, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
 ; GFX11-NEXT:    s_cbranch_execz .LBB2_13
@@ -76,7 +77,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_load_b128 s[20:23], s[18:19], 0x2c
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_bitcmp1_b32 s21, 0
-; GFX11-NEXT:    s_cselect_b32 s25, -1, 0
+; GFX11-NEXT:    s_cselect_b32 s26, -1, 0
 ; GFX11-NEXT:    s_bitcmp0_b32 s21, 0
 ; GFX11-NEXT:    s_mov_b32 s21, 0
 ; GFX11-NEXT:    s_cbranch_scc0 .LBB2_3
@@ -89,11 +90,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_mov_b32 s13, s14
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT:    s_mov_b32 s26, s14
-; GFX11-NEXT:    s_mov_b32 s14, s15
+; GFX11-NEXT:    s_mov_b32 s15, s14
+; GFX11-NEXT:    s_mov_b32 s14, s17
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT:    s_mov_b32 s14, s26
+; GFX11-NEXT:    s_mov_b32 s14, s15
 ; GFX11-NEXT:    s_mov_b32 s2, -1
 ; GFX11-NEXT:    s_cbranch_execz .LBB2_4
 ; GFX11-NEXT:    s_branch .LBB2_12
@@ -127,11 +128,11 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_mul_i32 s0, s0, s22
 ; GFX11-NEXT:    s_mul_i32 s0, s0, s20
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    s_or_b32 s0, s24, s0
+; GFX11-NEXT:    s_or_b32 s0, s25, s0
 ; GFX11-NEXT:    s_lshl_b64 s[22:23], s[0:1], 1
 ; GFX11-NEXT:    s_mov_b32 s0, s1
 ; GFX11-NEXT:    global_load_u16 v1, v0, s[22:23]
-; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s25
+; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s26
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
 ; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
@@ -152,7 +153,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_and_b32 s1, s1, 1
 ; GFX11-NEXT:    s_and_b32 s9, 0xffff, s0
 ; GFX11-NEXT:    s_cselect_b32 s9, -1, 0
-; GFX11-NEXT:    s_and_b32 s16, s8, exec_lo
+; GFX11-NEXT:    s_and_b32 s15, s8, exec_lo
 ; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s9
 ; GFX11-NEXT:    v_readfirstlane_b32 s9, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
@@ -182,8 +183,8 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_and_b32 s20, s2, exec_lo
 ; GFX11-NEXT:    s_or_not1_b32 s0, s21, exec_lo
 ; GFX11-NEXT:  .LBB2_13: ; %Flow9
-; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s17
-; GFX11-NEXT:    s_and_saveexec_b32 s17, s0
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s24
+; GFX11-NEXT:    s_and_saveexec_b32 s21, s0
 ; GFX11-NEXT:    s_cbranch_execz .LBB2_15
 ; GFX11-NEXT:  ; %bb.14: ; %bb43
 ; GFX11-NEXT:    s_add_u32 s8, s18, 0x58
@@ -193,13 +194,13 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
 ; GFX11-NEXT:    s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
 ; GFX11-NEXT:    s_mov_b32 s13, s14
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT:    s_mov_b32 s14, s18
+; GFX11-NEXT:    s_mov_b32 s14, s17
 ; GFX11-NEXT:    ; implicit-def: $sgpr15
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; GFX11-NEXT:    s_or_b32 s20, s20, exec_lo
 ; GFX11-NEXT:  .LBB2_15: ; %Flow14
-; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s17
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s21
 ; GFX11-NEXT:    s_and_saveexec_b32 s0, s20
 ; GFX11-NEXT:  ; %bb.16: ; %UnifiedUnreachableBlock
 ; GFX11-NEXT:    ; divergent unreachable



More information about the llvm-commits mailing list